diff options
author | Brian Sullivan <briansul@microsoft.com> | 2015-12-11 16:16:44 -0800 |
---|---|---|
committer | Brian Sullivan <briansul@microsoft.com> | 2015-12-11 16:16:44 -0800 |
commit | 121d095ed0b0076fb1c7ff59e6446fd19d506b32 (patch) | |
tree | 4a690f67e0117dd346a9de1937f018918a970ae9 /src | |
parent | f05270a77a9782c5960d1bdff82b8521b1e3fa5d (diff) | |
download | coreclr-121d095ed0b0076fb1c7ff59e6446fd19d506b32.tar.gz coreclr-121d095ed0b0076fb1c7ff59e6446fd19d506b32.tar.bz2 coreclr-121d095ed0b0076fb1c7ff59e6446fd19d506b32.zip |
Port of all JIT changes for .NET Framework 4.6.1 changes
http://blogs.msdn.com/b/dotnet/archive/2015/11/30/net-framework-4-6-1-is-now-available.aspx
.NET Framework list of changes in 4.6.1
https://github.com/Microsoft/dotnet/blob/master/releases/net461/dotnet461-changes.md
Additional changes including
- Working ARM64 JIT compiler
- Additional JIT Optimizations
o Tail call recursion optimization
o Array length tracking optimization
o CSE for widening casts
o Smaller encoding for RIP relative and absolute addresses in addressing modes
o Tracked Local Variable increased to 512
o Improved handling of Intrinsics System.GetType()
o Improved handling of Math intrinsics
- Work for the X86 Ryu-JIT compiler
[tfs-changeset: 1557101]
Diffstat (limited to 'src')
103 files changed, 15599 insertions, 6840 deletions
diff --git a/src/dirs.proj b/src/dirs.proj index 7cc1ed0513..6eab0285c7 100644 --- a/src/dirs.proj +++ b/src/dirs.proj @@ -42,13 +42,8 @@ <ProjectFile Include="dlls\dirs.proj" /> <ProjectFile Include="unwinder\dirs.proj" Condition="'$(TargetArch)' != 'i386'" /> - <!-- In the CodeGen branch, we use RyuJIT for all JIT builds --> - <ProjectFile Include="jit\dirs.proj" Condition="'$(_BuildBranch)' == 'CodeGen'" /> - - <!-- In other branches, we build RyuJIT only for amd64 and arm64, and use JIT32 for ARM and x86 --> - <ProjectFile Include="jit\dirs.proj" Condition="('$(_BuildBranch)' != 'CodeGen') and ('$(BuildArchitecture)' == 'amd64' or '$(BuildArchitecture)' == 'arm64')" /> - <ProjectFile Include="jit32\dirs.proj" Condition="('$(_BuildBranch)' != 'CodeGen') and ('$(BuildArchitecture)' != 'amd64' and '$(BuildArchitecture)' != 'arm64')" /> - + <ProjectFile Include="jit\dirs.proj" /> + <ProjectFile Include="jit32\dirs.proj" Condition="'$(BuildArchitecture)' != 'amd64' and '$(BuildArchitecture)' != 'arm64'" /> <ProjectFile Include="jit64\dirs.proj" Condition="'$(BuildProjectName)' != 'CoreSys'" /> <ProjectFile Include="tools\dirs.proj" /> diff --git a/src/dlls/mscoree/mscoree.settings.targets b/src/dlls/mscoree/mscoree.settings.targets index 2931de5da7..6aa31b990f 100644 --- a/src/dlls/mscoree/mscoree.settings.targets +++ b/src/dlls/mscoree/mscoree.settings.targets @@ -94,12 +94,9 @@ <ItemGroup Condition="'$(FeatureMergeJitAndEngine)' == 'true'"> <ImportLib Include="$(ClrLibPath)\clrjit.lib" /> - <!-- In the CodeGen branch, we use RyuJIT for all JIT builds --> - <ProjectReference Condition="'$(_BuildBranch)' == 'CodeGen'" Include="$(ClrSrcDirectory)jit\dll\jit.nativeproj" /> - - <!-- In other branches, we build RyuJIT only for amd64 and arm64, and use JIT32 for ARM and x86 --> - <ProjectReference Condition="'$(_BuildBranch)' != 'CodeGen' and ('$(BuildArchitecture)' == 'amd64' or '$(BuildArchitecture)' == 'arm64')" Include="$(ClrSrcDirectory)jit\dll\jit.nativeproj" /> - <ProjectReference Condition="'$(_BuildBranch)' != 'CodeGen' and ('$(BuildArchitecture)' != 'amd64' and '$(BuildArchitecture)' != 'arm64')" Include="$(ClrSrcDirectory)jit32\dll\jit.nativeproj" /> + <!-- We build RyuJIT only for amd64 and arm64, and use JIT32 for ARM and x86 --> + <ProjectReference Condition="'$(BuildArchitecture)' == 'amd64' or '$(BuildArchitecture)' == 'arm64'" Include="$(ClrSrcDirectory)jit\dll\jit.nativeproj" /> + <ProjectReference Condition="'$(BuildArchitecture)' != 'amd64' and '$(BuildArchitecture)' != 'arm64'" Include="$(ClrSrcDirectory)jit32\dll\jit.nativeproj" /> </ItemGroup> <ItemGroup> diff --git a/src/inc/clrconfigvalues.h b/src/inc/clrconfigvalues.h index 062ffb6e01..94379e2868 100644 --- a/src/inc/clrconfigvalues.h +++ b/src/inc/clrconfigvalues.h @@ -386,12 +386,17 @@ RETAIL_CONFIG_DWORD_INFO_EX(INTERNAL_JitEnableNoWayAssert, W("JitEnableNoWayAsse CONFIG_STRING_INFO_EX(INTERNAL_JitDisasm, W("JitDisasm"), "Dumps disassembly for specified method", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_DIRECT_ACCESS(INTERNAL_JitDoubleAlign, W("JitDoubleAlign"), "") CONFIG_STRING_INFO_EX(INTERNAL_JitDump, W("JitDump"), "Dumps trees for specified method", CLRConfig::REGUTIL_default) +CONFIG_STRING_INFO_EX(INTERNAL_JitDumpIR, W("JitDumpIR"), "Dumps trees (in linear IR form) for specified method", CLRConfig::REGUTIL_default) +CONFIG_STRING_INFO_EX(INTERNAL_JitDumpIRFormat, W("JitDumpIRFormat"), "Comma separated format control for JitDumpIR, values = {types | locals | ssa | valnums | kinds | flags | nodes | nolists | nostmts | noleafs | trees | dataflow}", CLRConfig::REGUTIL_default) +CONFIG_STRING_INFO_EX(INTERNAL_JitDumpIRPhase, W("JitDumpIRPhase"), "Phase control for JitDumpIR, values = {* | phasename}", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_JitDumpVerboseTrees, W("JitDumpVerboseTrees"), 0, "Enable more verbose tree dumps", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_JitDumpVerboseSsa, W("JitDumpVerboseSsa"), 0, "Produce especially verbose dump output for SSA", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_JitDumpBeforeAfterMorph, W("JitDumpBeforeAfterMorph"), 0, "If 1, display each tree before/after morphing", CLRConfig::REGUTIL_default) -CONFIG_STRING_INFO_EX(INTERNAL_JitDumpFg, W("JitDumpFg"), "Xml Flowgraph support", CLRConfig::REGUTIL_default) -CONFIG_STRING_INFO_EX(INTERNAL_JitDumpFgDir, W("JitDumpFgDir"), "Xml Flowgraph support", CLRConfig::REGUTIL_default) -CONFIG_STRING_INFO_EX(INTERNAL_JitDumpFgFile, W("JitDumpFgFile"), "Xml Flowgraph support", CLRConfig::REGUTIL_default) +CONFIG_STRING_INFO_EX(INTERNAL_JitDumpFg, W("JitDumpFg"), "Dumps Xml/Dot Flowgraph for specified method", CLRConfig::REGUTIL_default) +CONFIG_STRING_INFO_EX(INTERNAL_JitDumpFgDir, W("JitDumpFgDir"), "Directory for Xml/Dot flowgraph dump(s)", CLRConfig::REGUTIL_default) +CONFIG_STRING_INFO_EX(INTERNAL_JitDumpFgFile, W("JitDumpFgFile"), "Filename for Xml/Dot flowgraph dump(s)", CLRConfig::REGUTIL_default) +CONFIG_STRING_INFO_EX(INTERNAL_JitDumpFgPhase, W("JitDumpFgPhase"), "Phase-based Xml/Dot flowgraph support. Set to the short name of a phase to see the flowgraph after that phase. Leave unset to dump after COLD-BLK (determine first cold block) or set to * for all phases", CLRConfig::REGUTIL_default) +CONFIG_DWORD_INFO_EX(INTERNAL_JitDumpFgDot, W("JitDumpFgDot"), 0, "Set to non-zero to emit Dot instead of Xml Flowgraph dump", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_JitDumpLevel, W("JitDumpLevel"), 1, "", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_JitDumpASCII, W("JitDumpASCII"), 1, "Uses only ASCII characters in tree dumps", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_JitDumpTerseLsra, W("JitDumpTerseLsra"), 1, "Produce terse dump output for LSRA", CLRConfig::REGUTIL_default) @@ -413,6 +418,7 @@ CONFIG_STRING_INFO_EX(INTERNAL_JitHalt, W("JitHalt"), "Emits break instruction i CONFIG_DWORD_INFO_EX(INTERNAL_JitHashHalt, W("JitHashHalt"), (DWORD)-1, "Same as JitHalt, but for a method hash", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_JitHashBreak, W("JitHashBreak"), (DWORD)-1, "Same as JitBreak, but for a method hash", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_JitHashDump, W("JitHashDump"), (DWORD)-1, "Same as JitDump, but for a method hash", CLRConfig::REGUTIL_default) +CONFIG_DWORD_INFO_EX(INTERNAL_JitHashDumpIR, W("JitHashDumpIR"), (DWORD)-1, "Same as JitDumpIR, but for a method hash", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO(INTERNAL_JitHeartbeat, W("JitHeartbeat"), 0, "") CONFIG_DWORD_INFO(INTERNAL_JitHelperLogging, W("JitHelperLogging"), 0, "") CONFIG_STRING_INFO_EX(INTERNAL_JitImportBreak, W("JitImportBreak"), "", CLRConfig::REGUTIL_default) @@ -439,6 +445,7 @@ RETAIL_CONFIG_STRING_INFO_EX(EXTERNAL_AltJitName, W("AltJitName"), "Alternative RETAIL_CONFIG_STRING_INFO_EX(EXTERNAL_AltJit, W("AltJit"), "Enables AltJit and selectively limits it to the specified methods.", CLRConfig::REGUTIL_default) RETAIL_CONFIG_STRING_INFO_EX(EXTERNAL_AltJitExcludeAssemblies, W("AltJitExcludeAssemblies"), "Do not use AltJit on this semicolon-delimited list of assemblies.", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_AltJitLimit, W("AltJitLimit"), 0, "Max number of functions to use altjit for (decimal)", CLRConfig::REGUTIL_default) +CONFIG_DWORD_INFO_EX(INTERNAL_RunAltJitCode, W("RunAltJitCode"), 1, "If non-zero, and the compilation succeeds for an AltJit, then use the code. If zero, then we always throw away the generated code and fall back to the default compiler.", CLRConfig::REGUTIL_default) #endif // defined(ALLOW_SXS_JIT) #if defined(FEATURE_STACK_SAMPLING) @@ -467,7 +474,9 @@ CONFIG_DWORD_INFO_EX(INTERNAL_JitNoCSE, W("JitNoCSE"), 0, "", CLRConfig::REGUTIL CONFIG_DWORD_INFO_EX(INTERNAL_JitNoCSE2, W("JitNoCSE2"), 0, "", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_JitNoHoist, W("JitNoHoist"), 0, "", CLRConfig::REGUTIL_default) -CONFIG_DWORD_INFO_EX(INTERNAL_JitNoInline, W("JitNoInline"), 0, "Disables inlining", CLRConfig::REGUTIL_default) +RETAIL_CONFIG_DWORD_INFO_EX(INTERNAL_JitNoInline, W("JitNoInline"), 0, "Disables inlining of all methods", CLRConfig::REGUTIL_default) +RETAIL_CONFIG_DWORD_INFO_EX(INTERNAL_JitAggressiveInlining, W("JitAggressiveInlining"), 0, "Aggressive inlining of all methods", CLRConfig::REGUTIL_default) + CONFIG_STRING_INFO_EX(INTERNAL_JitNoProcedureSplitting, W("JitNoProcedureSplitting"), "Disallow procedure splitting for specified methods", CLRConfig::REGUTIL_default) CONFIG_STRING_INFO_EX(INTERNAL_JitNoProcedureSplittingEH, W("JitNoProcedureSplittingEH"), "Disallow procedure splitting for specified methods if they contain exception handling", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_JitNoRegLoc, W("JitNoRegLoc"), 0, "", CLRConfig::REGUTIL_default) @@ -484,6 +493,7 @@ CONFIG_DWORD_INFO_EX(INTERNAL_JitSlowDebugChecksEnabled, W("JitSlowDebugChecksEn CONFIG_DWORD_INFO_EX(INTERNAL_JITPInvokeCheckEnabled, W("JITPInvokeCheckEnabled"), 0, "", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO(INTERNAL_JITPInvokeEnabled, W("JITPInvokeEnabled"), 1, "") RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_JitPrintInlinedMethods, W("JitPrintInlinedMethods"), 0, "", CLRConfig::REGUTIL_default) +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_JitTelemetry, W("JitTelemetry"), 1, "If non-zero, gather JIT telemetry data") CONFIG_STRING_INFO_EX(INTERNAL_JitRange, W("JitRange"), "", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_JITRequired, W("JITRequired"), (unsigned)-1, "", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_DIRECT_ACCESS(INTERNAL_JITRoundFloat, W("JITRoundFloat"), "") @@ -498,6 +508,7 @@ CONFIG_STRING_INFO_EX(INTERNAL_JitStressModeNamesNot, W("JitStressModeNamesNot") CONFIG_STRING_INFO_EX(INTERNAL_JitStressOnly, W("JitStressOnly"), "Internal Jit stress mode: stress only the specified method(s)", CLRConfig::REGUTIL_default) CONFIG_STRING_INFO_EX(INTERNAL_JitStressRange, W("JitStressRange"), "Internal Jit stress mode", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_JitStressRegs, W("JitStressRegs"), 0, "", CLRConfig::REGUTIL_default) +CONFIG_DWORD_INFO_EX(INTERNAL_JitStressBiasedCSE, W("JitStressBiasedCSE"), 0x101, "Internal Jit stress mode: decimal bias value between (0,100) to perform CSE on a candidate. 100% = All CSEs. 0% = 0 CSE. (> 100) means no stress.", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_JitStrictCheckForNonVirtualCallToVirtualMethod, W("JitStrictCheckForNonVirtualCallToVirtualMethod"), 1, "", CLRConfig::REGUTIL_default) RETAIL_CONFIG_STRING_INFO(INTERNAL_JitTimeLogFile, W("JitTimeLogFile"), "If set, gather JIT throughput data and write to this file.") RETAIL_CONFIG_STRING_INFO(INTERNAL_JitTimeLogCsv, W("JitTimeLogCsv"), "If set, gather JIT throughput data and write to a CSV file. This mode must be used in internal retail builds.") @@ -509,8 +520,10 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitLockWrite, W("JitLockWrite"), 0, "Force all CONFIG_STRING_INFO_EX(INTERNAL_TailCallMax, W("TailCallMax"), "", CLRConfig::REGUTIL_default) RETAIL_CONFIG_STRING_INFO_EX(EXTERNAL_TailCallOpt, W("TailCallOpt"), "", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_TailcallStress, W("TailcallStress"), 0, "", CLRConfig::REGUTIL_default) +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TailCallLoopOpt, W("TailCallLoopOpt"), 1, "Convert recursive tail calls to loops") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_Jit_NetFx40PInvokeStackResilience, W("NetFx40_PInvokeStackResilience"), (DWORD)-1, "Makes P/Invoke resilient against mismatched signature and calling convention (significant perf penalty).") CONFIG_DWORD_INFO_EX(INTERNAL_JitDoSsa, W("JitDoSsa"), 1, "Perform Static Single Assignment (SSA) numbering on the variables", CLRConfig::REGUTIL_default) +CONFIG_DWORD_INFO_EX(INTERNAL_JitDoEarlyProp, W("JitDoEarlyProp"), 1, "Perform Early Value Propagataion", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_JitDoValueNumber, W("JitDoValueNumber"), 1, "Perform value numbering on method expressions", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_JitDoLoopHoisting, W("JitDoLoopHoisting"), 1, "Perform loop hoisting on loop invariant values", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_JitDoCopyProp, W("JitDoCopyProp"), 1, "Perform copy propagation on variables that appear redundant", CLRConfig::REGUTIL_default) @@ -544,6 +557,10 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitVNMapSelBudget, W("JitVNMapSelBudget"), 100 RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_FeatureSIMD, W("FeatureSIMD"), EXTERNAL_FeatureSIMD_Default, "Enable SIMD support with companion SIMDVector.dll", CLRConfig::REGUTIL_default) RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_EnableAVX, W("EnableAVX"), EXTERNAL_JitEnableAVX_Default, "Enable AVX instruction set for wide operations as default", CLRConfig::REGUTIL_default) +#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) +CONFIG_DWORD_INFO_EX(INTERNAL_JitEnablePCRelAddr, W("JitEnablePCRelAddr"), 1, "Whether absolute addr be encoded as PC-rel offset by RyuJIT where possible", CLRConfig::REGUTIL_default) +#endif //_TARGET_X86_ || _TARGET_AMD64_ + #ifdef FEATURE_MULTICOREJIT RETAIL_CONFIG_STRING_INFO(INTERNAL_MultiCoreJitProfile, W("MultiCoreJitProfile"), "If set, use the file to store/control multi-core JIT.") @@ -714,12 +731,16 @@ RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_NGenDeferAllCompiles, W("NGenDeferAllCompil RETAIL_CONFIG_DWORD_INFO_EX(UNSUPPORTED_NGenDependencyWorkerHang, W("NGenDependencyWorkerHang"), 0, "If set to 1, NGen dependency walk worker process hangs forever", CLRConfig::REGUTIL_default) CONFIG_STRING_INFO_EX(INTERNAL_NgenDisasm, W("NgenDisasm"), "Same as JitDisasm, but for ngen", CLRConfig::REGUTIL_default) CONFIG_STRING_INFO_EX(INTERNAL_NgenDump, W("NgenDump"), "Same as JitDump, but for ngen", CLRConfig::REGUTIL_default) +CONFIG_STRING_INFO_EX(INTERNAL_NgenDumpIR, W("NgenDumpIR"), "Same as JitDumpIR, but for ngen", CLRConfig::REGUTIL_default) +CONFIG_STRING_INFO_EX(INTERNAL_NgenDumpIRFormat, W("NgenDumpIRFormat"), "Same as JitDumpIRFormat, but for ngen", CLRConfig::REGUTIL_default) +CONFIG_STRING_INFO_EX(INTERNAL_NgenDumpIRPhase, W("NgenDumpIRPhase"), "Same as JitDumpIRPhase, but for ngen", CLRConfig::REGUTIL_default) CONFIG_STRING_INFO_EX(INTERNAL_NgenDumpFg, W("NgenDumpFg"), "Ngen Xml Flowgraph support", CLRConfig::REGUTIL_default) CONFIG_STRING_INFO_EX(INTERNAL_NgenDumpFgDir, W("NgenDumpFgDir"), "Ngen Xml Flowgraph support", CLRConfig::REGUTIL_default) CONFIG_STRING_INFO_EX(INTERNAL_NgenDumpFgFile, W("NgenDumpFgFile"), "Ngen Xml Flowgraph support", CLRConfig::REGUTIL_default) RETAIL_CONFIG_DWORD_INFO_EX(UNSUPPORTED_NGenFramed, W("NGenFramed"), -1, "same as JitFramed, but for ngen", CLRConfig::REGUTIL_default) CONFIG_STRING_INFO_EX(INTERNAL_NgenGCDump, W("NgenGCDump"), "", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_NgenHashDump, W("NgenHashDump"), (DWORD)-1, "same as JitHashDump, but for ngen", CLRConfig::REGUTIL_default) +CONFIG_DWORD_INFO_EX(INTERNAL_NgenHashDumpIR, W("NgenHashDumpIR"), (DWORD)-1, "same as JitHashDumpIR, but for ngen", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_NGENInjectFailuresServiceOnly, W("NGENInjectFailuresServiceOnly"), 1, "", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_NGENInjectPerAssemblyFailure, W("NGENInjectPerAssemblyFailure"), 0, "", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_EX(INTERNAL_NGENInjectTransientFailure, W("NGENInjectTransientFailure"), 0, "", CLRConfig::REGUTIL_default) diff --git a/src/inc/corinfo.h b/src/inc/corinfo.h index cc2ce720b8..442721b582 100644 --- a/src/inc/corinfo.h +++ b/src/inc/corinfo.h @@ -189,6 +189,77 @@ TODO: Talk about initializing strutures before use #include <corhdr.h> #include <specstrings.h> +////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE +// +// #JITEEVersionIdentifier +// +// This GUID represents the version of the JIT/EE interface. Any time the interface between the JIT and +// the EE changes (by adding or removing methods to any interface shared between them), this GUID should +// be changed. This is the identifier verified by ICorJitCompiler::getVersionIdentifier(). +// +// You can use "uuidgen.exe -s" to generate this value. +// +// **** NOTE TO INTEGRATORS: +// +// If there is a merge conflict here, because the version changed in two different places, you must +// create a **NEW** GUID, not simply choose one or the other! +// +// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE +// +////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#if !defined(SELECTANY) + #define SELECTANY extern __declspec(selectany) +#endif + +// COR_JIT_EE_VERSION is a #define that specifies a JIT-EE version, but on a less granular basis than the GUID. +// The #define is intended to be used on a per-product basis. That is, for each release that we support a JIT +// CTP build, we'll update the COR_JIT_EE_VERSION. The GUID must change any time any part of the interface changes. +// +// COR_JIT_EE_VERSION is set, by convention, to a number related to the the product number. So, 460 is .NET 4.60. +// 461 would indicate .NET 4.6.1. Etc. +// +// Note that the EE should always build with the most current (highest numbered) version. Only the JIT will +// potentially build with a lower version number. In that case, the COR_JIT_EE_VERSION will be specified in the +// CTP JIT build project, such as ctpjit.nativeproj. + +#if !defined(COR_JIT_EE_VERSION) +#define COR_JIT_EE_VERSION 999999999 // This means we'll take everything in the interface +#endif + +#if COR_JIT_EE_VERSION > 460 + +// Update this one +SELECTANY const GUID JITEEVersionIdentifier = { /* f7be09f3-9ca7-42fd-b0ca-f97c0499f5a3 */ + 0xf7be09f3, + 0x9ca7, + 0x42fd, + {0xb0, 0xca, 0xf9, 0x7c, 0x04, 0x99, 0xf5, 0xa3} +}; + +#else + +// ************ Leave this one alone *************** +// We need it to build a .NET 4.6 compatible JIT for the RyuJIT CTP releases +SELECTANY const GUID JITEEVersionIdentifier = { /* 9110edd8-8fc3-4e3d-8ac9-12555ff9be9c */ + 0x9110edd8, + 0x8fc3, + 0x4e3d, + { 0x8a, 0xc9, 0x12, 0x55, 0x5f, 0xf9, 0xbe, 0x9c } +}; + +#endif + +////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// END JITEEVersionIdentifier +// +////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#if COR_JIT_EE_VERSION > 460 + // For System V on the CLR type system number of registers to pass in and return a struct is the same. // The CLR type system allows only up to 2 eightbytes to be passed in registers. There is no SSEUP classification types. #define CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS 2 @@ -263,6 +334,8 @@ private: } }; +#endif // COR_JIT_EE_VERSION + // CorInfoHelpFunc defines the set of helpers (accessed via the ICorDynamicInfo::getHelperFtn()) // These helpers can be called by native code which executes in the runtime. // Compilers can emit calls to these helpers. @@ -329,9 +402,8 @@ enum CorInfoHelpFunc CORINFO_HELP_NEWARR_1_ALIGN8, // like VC, but aligns the array start CORINFO_HELP_STRCNS, // create a new string literal -#if !defined(RYUJIT_CTPBUILD) CORINFO_HELP_STRCNS_CURRENT_MODULE, // create a new string literal from the current module (used by NGen code) -#endif + /* Object model */ CORINFO_HELP_INITCLASS, // Initialize class if not already initialized @@ -369,9 +441,9 @@ enum CorInfoHelpFunc CORINFO_HELP_RNGCHKFAIL, // array bounds check failed CORINFO_HELP_OVERFLOW, // throw an overflow exception CORINFO_HELP_THROWDIVZERO, // throw a divide by zero exception -#ifndef RYUJIT_CTPBUILD +#if COR_JIT_EE_VERSION > 460 CORINFO_HELP_THROWNULLREF, // throw a null reference exception -#endif +#endif // COR_JIT_EE_VERSION CORINFO_HELP_INTERNALTHROW, // Support for really fast jit CORINFO_HELP_VERIFICATION, // Throw a VerificationException @@ -509,9 +581,6 @@ enum CorInfoHelpFunc // These helpers are required for MDIL backward compatibility only. They are not used by current JITed code. CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPEHANDLE_OBSOLETE, // Convert from a TypeHandle (native structure pointer) to RuntimeTypeHandle at run-time -#if defined(RYUJIT_CTPBUILD) - CORINFO_HELP_METHODDESC_TO_RUNTIMEMETHODHANDLE_MAYBENULL_OBSOLETE, // Convert from a MethodDesc (native structure pointer) to RuntimeMethodHandle at run-time -#endif CORINFO_HELP_METHODDESC_TO_RUNTIMEMETHODHANDLE_OBSOLETE, // Convert from a MethodDesc (native structure pointer) to RuntimeMethodHandle at run-time CORINFO_HELP_FIELDDESC_TO_RUNTIMEFIELDHANDLE_OBSOLETE, // Convert from a FieldDesc (native structure pointer) to RuntimeFieldHandle at run-time @@ -523,7 +592,6 @@ enum CorInfoHelpFunc CORINFO_HELP_VIRTUAL_FUNC_PTR, // look up a virtual method at run-time //CORINFO_HELP_VIRTUAL_FUNC_PTR_LOG, // look up a virtual method at run-time, with IBC logging -#ifndef RYUJIT_CTPBUILD // Not a real helpers. Instead of taking handle arguments, these helpers point to a small stub that loads the handle argument and calls the static helper. CORINFO_HELP_READYTORUN_NEW, CORINFO_HELP_READYTORUN_NEWARR_1, @@ -531,8 +599,12 @@ enum CorInfoHelpFunc CORINFO_HELP_READYTORUN_CHKCAST, CORINFO_HELP_READYTORUN_STATIC_BASE, CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR, + +#if COR_JIT_EE_VERSION > 460 CORINFO_HELP_READYTORUN_DELEGATE_CTOR, -#endif +#else + #define CORINFO_HELP_READYTORUN_DELEGATE_CTOR CORINFO_HELP_EE_PRESTUB +#endif // COR_JIT_EE_VERSION #ifdef REDHAWK // these helpers are arbitrary since we don't have any relation to the actual CLR corinfo.h. @@ -618,7 +690,7 @@ enum CorInfoHelpFunc CORINFO_HELP_LOOP_CLONE_CHOICE_ADDR, // Return the reference to a counter to decide to take cloned path in debug stress. CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, // Print a message that a loop cloning optimization has occurred in debug mode. -#ifndef RYUJIT_CTPBUILD +#if COR_JIT_EE_VERSION > 460 CORINFO_HELP_THROW_ARGUMENTEXCEPTION, // throw ArgumentException CORINFO_HELP_THROW_ARGUMENTOUTOFRANGEEXCEPTION, // throw ArgumentOutOfRangeException #endif @@ -1184,6 +1256,19 @@ enum CorInfoIntrinsics CORINFO_INTRINSIC_Sqrt, CORINFO_INTRINSIC_Abs, CORINFO_INTRINSIC_Round, + CORINFO_INTRINSIC_Cosh, + CORINFO_INTRINSIC_Sinh, + CORINFO_INTRINSIC_Tan, + CORINFO_INTRINSIC_Tanh, + CORINFO_INTRINSIC_Asin, + CORINFO_INTRINSIC_Acos, + CORINFO_INTRINSIC_Atan, + CORINFO_INTRINSIC_Atan2, + CORINFO_INTRINSIC_Log10, + CORINFO_INTRINSIC_Pow, + CORINFO_INTRINSIC_Exp, + CORINFO_INTRINSIC_Ceiling, + CORINFO_INTRINSIC_Floor, CORINFO_INTRINSIC_GetChar, // fetch character out of string CORINFO_INTRINSIC_Array_GetDimLength, // Get number of elements in a given dimension of an array CORINFO_INTRINSIC_Array_Get, // Get the value of an element in an array @@ -1898,9 +1983,7 @@ struct CORINFO_CALL_INFO CORINFO_LOOKUP codePointerLookup; }; -#ifndef RYUJIT_CTPBUILD CORINFO_CONST_LOOKUP instParamLookup; // Used by Ready-to-Run -#endif }; //---------------------------------------------------------------------------- @@ -1909,9 +1992,7 @@ struct CORINFO_CALL_INFO enum CORINFO_FIELD_ACCESSOR { CORINFO_FIELD_INSTANCE, // regular instance field at given offset from this-ptr -#ifndef RYUJIT_CTPBUILD CORINFO_FIELD_INSTANCE_WITH_BASE, // instance field with base offset (used by Ready-to-Run) -#endif CORINFO_FIELD_INSTANCE_HELPER, // instance field accessed using helper (arguments are this, FieldDesc * and the value) CORINFO_FIELD_INSTANCE_ADDR_HELPER, // instance field accessed using address-of helper (arguments are this and FieldDesc *) @@ -1956,9 +2037,7 @@ struct CORINFO_FIELD_INFO CorInfoIsAccessAllowedResult accessAllowed; CORINFO_HELPER_DESC accessCalloutHelper; -#ifndef RYUJIT_CTPBUILD CORINFO_CONST_LOOKUP fieldLookup; // Used by Ready-to-Run -#endif }; //---------------------------------------------------------------------------- @@ -2027,10 +2106,8 @@ struct CORINFO_EE_INFO unsigned offsetOfTransparentProxyRP; unsigned offsetOfRealProxyServer; -#ifndef RYUJIT_CTPBUILD // Array offsets unsigned offsetOfObjArrayData; -#endif CORINFO_OS osType; unsigned osMajor; @@ -2120,9 +2197,6 @@ struct CORINFO_RefArray : public CORINFO_Object #ifdef _WIN64 unsigned alignpad; #endif // _WIN64 -#if defined(RYUJIT_CTPBUILD) - CORINFO_CLASS_HANDLE cls; -#endif #if 0 /* Multi-dimensional arrays have the lengths and bounds here */ @@ -2628,13 +2702,11 @@ public: CORINFO_METHOD_HANDLE method ) = 0; -#ifndef RYUJIT_CTPBUILD // Is the given module the System.Numerics.Vectors module? // This defaults to false. virtual bool isInSIMDModule( CORINFO_CLASS_HANDLE classHnd ) { return false; } -#endif // RYUJIT_CTPBUILD // return the unmanaged calling convention for a PInvoke virtual CorInfoUnmanagedCallConv getUnmanagedCallConv( @@ -3029,13 +3101,11 @@ public: CORINFO_CLASS_HANDLE cls ) = 0; -#ifndef RYUJIT_CTPBUILD virtual void getReadyToRunHelper( CORINFO_RESOLVED_TOKEN * pResolvedToken, CorInfoHelpFunc id, CORINFO_CONST_LOOKUP * pLookup ) = 0; -#endif virtual const char* getHelperName( CorInfoHelpFunc @@ -3371,12 +3441,6 @@ public: // Returns name of the JIT timer log virtual LPCWSTR getJitTimeLogFilename() = 0; -#ifdef RYUJIT_CTPBUILD - // Logs a SQM event for a JITting a very large method. - virtual void logSQMLongJitEvent(unsigned mcycles, unsigned msec, unsigned ilSize, unsigned numBasicBlocks, bool minOpts, - CORINFO_METHOD_HANDLE methodHnd) = 0; -#endif // RYUJIT_CTPBUILD - /*********************************************************************************/ // // Diagnostic methods @@ -3412,13 +3476,14 @@ public: size_t FQNameCapacity /* IN */ ) = 0; +#if COR_JIT_EE_VERSION > 460 + // returns whether the struct is enregisterable. Only valid on a System V VM. Returns true on success, false on failure. virtual bool getSystemVAmd64PassStructInRegisterDescriptor( /* IN */ CORINFO_CLASS_HANDLE structHnd, /* OUT */ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr ) = 0; -#if !defined(RYUJIT_CTPBUILD) /*************************************************************************/ // // Configuration values - Allows querying of the CLR configuration. @@ -3445,7 +3510,9 @@ public: virtual void freeStringConfigValue( __in_z wchar_t *value ) = 0; -#endif // !RYUJIT_CTPBUILD + +#endif // COR_JIT_EE_VERSION + }; /***************************************************************************** @@ -3521,23 +3588,12 @@ public: void **ppIndirection = NULL ) = 0; -#if defined(RYUJIT_CTPBUILD) - // These entry points must be called if a handle is being embedded in - // the code to be passed to a JIT helper function. (as opposed to just - // being passed back into the ICorInfo interface.) - - // a module handle may not always be available. A call to embedModuleHandle should always - // be preceeded by a call to canEmbedModuleHandleForHelper. A dynamicMethod does not have a module - virtual bool canEmbedModuleHandleForHelper( - CORINFO_MODULE_HANDLE handle - ) = 0; -#else // get slow lazy string literal helper to use (CORINFO_HELP_STRCNS*). // Returns CORINFO_HELP_UNDEF if lazy string literal helper cannot be used. virtual CorInfoHelpFunc getLazyStringLiteralHelper( CORINFO_MODULE_HANDLE handle ) = 0; -#endif + virtual CORINFO_MODULE_HANDLE embedModuleHandle( CORINFO_MODULE_HANDLE handle, void **ppIndirection = NULL diff --git a/src/inc/corjit.h b/src/inc/corjit.h index a470d5bff8..8612d954df 100644 --- a/src/inc/corjit.h +++ b/src/inc/corjit.h @@ -334,52 +334,6 @@ struct IEEMemoryManager; extern "C" ICorJitCompiler* __stdcall getJit(); -////////////////////////////////////////////////////////////////////////////////////////////////////////// -// -// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE -// -// #JITEEVersionIdentifier -// -// This GUID represents the version of the JIT/EE interface. Any time the interface between the JIT and -// the EE changes (by adding or removing methods to any interface shared between them), this GUID should -// be changed. This is the identifier verified by ICorJitCompiler::getVersionIdentifier(). -// -// You can use "uuidgen.exe -s" to generate this value. -// -// **** NOTE TO INTEGRATORS: -// -// If there is a merge conflict here, because the version changed in two different places, you must -// create a **NEW** GUID, not simply choose one or the other! -// -// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE -// -////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#if !defined(SELECTANY) - #define SELECTANY extern __declspec(selectany) -#endif - -#if !defined(RYUJIT_CTPBUILD) - -// Update this one -SELECTANY const GUID JITEEVersionIdentifier = { /* f7be09f3-9ca7-42fd-b0ca-f97c0499f5a3 */ - 0xf7be09f3, - 0x9ca7, - 0x42fd, - {0xb0, 0xca, 0xf9, 0x7c, 0x04, 0x99, 0xf5, 0xa3} -}; - -#else -// Leave this one alone -// We need it to build a .NET 4.5.1 compatible JIT for the RyuJIT CTP releases -SELECTANY const GUID JITEEVersionIdentifier = { /* 72d8f09d-1052-4466-94e9-d095b370bdae */ - 0x72d8f09d, - 0x1052, - 0x4466, - {0x94, 0xe9, 0xd0, 0x95, 0xb3, 0x70, 0xbd, 0xae} -}; -#endif - // #EEToJitInterface // ICorJitCompiler is the interface that the EE uses to get IL bytecode converted to native code. Note that // to accomplish this the JIT has to call back to the EE to get symbolic information. The code:ICorJitInfo @@ -428,7 +382,6 @@ public: GUID* versionIdentifier /* OUT */ ) = 0; -#ifndef RYUJIT_CTPBUILD // When the EE loads the System.Numerics.Vectors assembly, it asks the JIT what length (in bytes) of // SIMD vector it supports as an intrinsic type. Zero means that the JIT does not support SIMD // intrinsics, so the EE should use the default size (i.e. the size of the IL implementation). @@ -441,7 +394,6 @@ public: // ICorJitCompiler implementation. If 'realJitCompiler' is nullptr, then the JIT should resume // executing all the functions itself. virtual void setRealJit(ICorJitCompiler* realJitCompiler) { } -#endif // !RYUJIT_CTPBUILD }; @@ -579,7 +531,6 @@ public: ULONG * numRuns ) = 0; -#if !defined(RYUJIT_CTPBUILD) // Associates a native call site, identified by its offset in the native code stream, with // the signature information and method handle the JIT used to lay out the call site. If // the call site has no signature information (e.g. a helper call) or has no method handle @@ -589,7 +540,6 @@ public: CORINFO_SIG_INFO * callSig, /* IN */ CORINFO_METHOD_HANDLE methodHandle /* IN */ ) = 0; -#endif // !defined(RYUJIT_CTPBUILD) // A relocation is recorded if we are pre-jitting. // A jump thunk may be inserted if we are jitting diff --git a/src/inc/jithelpers.h b/src/inc/jithelpers.h index f591564d61..bd3ad544fb 100644 --- a/src/inc/jithelpers.h +++ b/src/inc/jithelpers.h @@ -5,6 +5,20 @@ // // Allow multiple inclusion. +////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE +// +// The JIT/EE interface is versioned. By "interface", we mean mean any and all communication between the +// JIT and the EE. Any time a change is made to the interface, the JIT/EE interface version identifier +// must be updated. See code:JITEEVersionIdentifier for more information. +// +// THIS FILE IS PART OF THE JIT-EE INTERFACE. +// +// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE +// +////////////////////////////////////////////////////////////////////////////////////////////////////////// + #ifndef DYNAMICJITHELPER //I should never try to generate an alignment stub for a dynamic helper @@ -93,10 +107,7 @@ DYNAMICJITHELPER1(CORINFO_HELP_NEWARR_1_ALIGN8, JIT_NewArr1,CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_UNDEF) JITHELPER1(CORINFO_HELP_STRCNS, JIT_StrCns, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_STRCNS) -// Any new jit helpers need to be removed for the RYUJIT_CTPBUILD so we can build a 4.5.1 compatible JIT -#if !defined(RYUJIT_CTPBUILD) JITHELPER1(CORINFO_HELP_STRCNS_CURRENT_MODULE, NULL, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_UNDEF) -#endif // Object model JITHELPER1(CORINFO_HELP_INITCLASS, JIT_InitClass, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_INITCLASS) @@ -132,9 +143,9 @@ JITHELPER1(CORINFO_HELP_RNGCHKFAIL, JIT_RngChkFail, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_RNGCHKFAIL) JITHELPER1(CORINFO_HELP_OVERFLOW, JIT_Overflow, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_OVERFLOW) JITHELPER1(CORINFO_HELP_THROWDIVZERO, JIT_ThrowDivZero, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_UNDEF) -#ifndef RYUJIT_CTPBUILD +#if COR_JIT_EE_VERSION > 460 JITHELPER1(CORINFO_HELP_THROWNULLREF, JIT_ThrowNullRef, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_UNDEF) -#endif +#endif // COR_JIT_EE_VERSION JITHELPER1(CORINFO_HELP_INTERNALTHROW, JIT_InternalThrow, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_INTERNALTHROW) JITHELPER1(CORINFO_HELP_VERIFICATION, IL_VerificationError,CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_VERIFICATION) JITHELPER1(CORINFO_HELP_SEC_UNMGDCODE_EXCPT, JIT_SecurityUnmanagedCodeException, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_SEC_UNMGDCODE_EXCPT) @@ -222,12 +233,17 @@ #else JITHELPER1(CORINFO_HELP_GETSTATICFIELDADDR_CONTEXT, NULL, CORINFO_HELP_SIG_CANNOT_USE_ALIGN_STUB, MDIL_HELP_GETSTATICFIELDADDR_CONTEXT) #endif + +#if COR_JIT_EE_VERSION > 460 #ifdef FEATURE_MIXEDMODE // TLS JITHELPER1(CORINFO_HELP_GETSTATICFIELDADDR_TLS, JIT_GetStaticFieldAddr_Tls,CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_GETSTATICFIELDADDR_TLS) #else // FEATURE_MIXEDMODE JITHELPER1(CORINFO_HELP_GETSTATICFIELDADDR_TLS, NULL, CORINFO_HELP_SIG_CANNOT_USE_ALIGN_STUB, MDIL_HELP_GETSTATICFIELDADDR_TLS) #endif // FEATURE_MIXEDMODE +#else // COR_JIT_EE_VERSION + JITHELPER1(CORINFO_HELP_GETSTATICFIELDADDR_TLS, JIT_GetStaticFieldAddr_Tls,CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_GETSTATICFIELDADDR_TLS) +#endif // COR_JIT_EE_VERSION JITHELPER1(CORINFO_HELP_GETGENERICS_GCSTATIC_BASE, JIT_GetGenericsGCStaticBase,CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_UNDEF) JITHELPER1(CORINFO_HELP_GETGENERICS_NONGCSTATIC_BASE, JIT_GetGenericsNonGCStaticBase,CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_UNDEF) @@ -294,13 +310,6 @@ JITHELPER1(CORINFO_HELP_RUNTIMEHANDLE_METHOD_LOG,JIT_GenericHandleMethodLogging, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_RUNTIMEHANDLE_METHOD_LOG) JITHELPER1(CORINFO_HELP_RUNTIMEHANDLE_CLASS, JIT_GenericHandleClass, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_RUNTIMEHANDLE_CLASS) JITHELPER1(CORINFO_HELP_RUNTIMEHANDLE_CLASS_LOG, JIT_GenericHandleClassLogging, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_RUNTIMEHANDLE_CLASS_LOG) -#if defined(RYUJIT_CTPBUILD) - JITHELPER1(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPEHANDLE, JIT_GetRuntimeTypeHandle, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_TYPEHANDLE_TO_RUNTIMETYPEHANDLE) - // This helper is not needed for MDIL - MDIL does not support IL instructions that this helper is needed for - JITHELPER1(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPEHANDLE_MAYBENULL, JIT_GetRuntimeTypeHandle_MaybeNull, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_UNDEF) - JITHELPER1(CORINFO_HELP_METHODDESC_TO_RUNTIMEMETHODHANDLE, JIT_GetRuntimeMethodHandle, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_METHODDESC_TO_RUNTIMEMETHODHANDLE) - JITHELPER1(CORINFO_HELP_FIELDDESC_TO_RUNTIMEFIELDHANDLE, JIT_GetRuntimeFieldHandle, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_FIELDDESC_TO_RUNTIMEFIELDHANDLE) -#else // stuff below is only for post-CTP builds #if defined(FEATURE_CORECLR) && defined(_TARGET_ARM_) // MDIL // These helpers are required for MDIL backward compatibility only. They are not used by current JITed code. JITHELPER1(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPEHANDLE_OBSOLETE, JIT_GetRuntimeTypeHandle, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_TYPEHANDLE_TO_RUNTIMETYPEHANDLE) @@ -311,7 +320,6 @@ JITHELPER1(CORINFO_HELP_METHODDESC_TO_RUNTIMEMETHODHANDLE_OBSOLETE, NULL, CORINFO_HELP_SIG_CANNOT_USE_ALIGN_STUB, MDIL_HELP_UNDEF) JITHELPER1(CORINFO_HELP_FIELDDESC_TO_RUNTIMEFIELDHANDLE_OBSOLETE, NULL, CORINFO_HELP_SIG_CANNOT_USE_ALIGN_STUB, MDIL_HELP_UNDEF) #endif // MDIL -#endif // defined(RYUJIT_CTBUILD) JITHELPER1(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE, JIT_GetRuntimeType, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_TYPEHANDLE_TO_RUNTIMETYPE) // This helper is not needed for MDIL - MDIL does not support IL instructions that this helper is needed for JITHELPER1(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL, JIT_GetRuntimeType_MaybeNull, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_UNDEF) @@ -321,15 +329,15 @@ JITHELPER1(CORINFO_HELP_VIRTUAL_FUNC_PTR, JIT_VirtualFunctionPointer, CORINFO_HELP_SIG_4_STACK, MDIL_HELP_VIRTUAL_FUNC_PTR) //JITHELPER1(CORINFO_HELP_VIRTUAL_FUNC_PTR_LOG,JIT_VirtualFunctionPointerLogging) -#ifndef RYUJIT_CTPBUILD JITHELPER1(CORINFO_HELP_READYTORUN_NEW, NULL, CORINFO_HELP_SIG_NO_ALIGN_STUB, MDIL_HELP_UNDEF) JITHELPER1(CORINFO_HELP_READYTORUN_NEWARR_1, NULL, CORINFO_HELP_SIG_NO_ALIGN_STUB, MDIL_HELP_UNDEF) JITHELPER1(CORINFO_HELP_READYTORUN_ISINSTANCEOF, NULL, CORINFO_HELP_SIG_NO_ALIGN_STUB, MDIL_HELP_UNDEF) JITHELPER1(CORINFO_HELP_READYTORUN_CHKCAST, NULL, CORINFO_HELP_SIG_NO_ALIGN_STUB, MDIL_HELP_UNDEF) JITHELPER1(CORINFO_HELP_READYTORUN_STATIC_BASE, NULL, CORINFO_HELP_SIG_NO_ALIGN_STUB, MDIL_HELP_UNDEF) JITHELPER1(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR, NULL, CORINFO_HELP_SIG_NO_ALIGN_STUB, MDIL_HELP_UNDEF) +#if COR_JIT_EE_VERSION > 460 JITHELPER1(CORINFO_HELP_READYTORUN_DELEGATE_CTOR, NULL, CORINFO_HELP_SIG_NO_ALIGN_STUB, MDIL_HELP_UNDEF) -#endif +#endif // COR_JIT_EE_VERSION JITHELPER1(CORINFO_HELP_EE_PRESTUB, ThePreStub, CORINFO_HELP_SIG_NO_ALIGN_STUB, MDIL_HELP_EE_PRESTUB) @@ -388,10 +396,12 @@ // JbTodo: This helper definition is missing it's MDIL helper counterpart. JITHELPER1(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, JIT_DebugLogLoopCloning, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_UNDEF) -#ifndef RYUJIT_CTPBUILD +#if COR_JIT_EE_VERSION > 460 + JITHELPER1(CORINFO_HELP_THROW_ARGUMENTEXCEPTION, JIT_ThrowArgumentException, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_UNDEF) JITHELPER1(CORINFO_HELP_THROW_ARGUMENTOUTOFRANGEEXCEPTION, JIT_ThrowArgumentOutOfRangeException, CORINFO_HELP_SIG_REG_ONLY, MDIL_HELP_UNDEF) -#endif + +#endif // COR_JIT_EE_VERSION #undef JITHELPER1 #undef DYNAMICJITHELPER1 diff --git a/src/jit/CMakeLists.txt b/src/jit/CMakeLists.txt index 35add2e891..b46adec89a 100644 --- a/src/jit/CMakeLists.txt +++ b/src/jit/CMakeLists.txt @@ -18,6 +18,7 @@ set( JIT_SOURCES block.cpp compiler.cpp disasm.cpp + earlyprop.cpp eeinterface.cpp ee_il_dll.cpp jiteh.cpp @@ -31,6 +32,7 @@ set( JIT_SOURCES hashbv.cpp importer.cpp instr.cpp + jittelemetry.cpp lclvars.cpp liveness.cpp morph.cpp @@ -56,6 +58,7 @@ set( JIT_SOURCES codegencommon.cpp assertionprop.cpp rangecheck.cpp + jittelemetry.cpp loopcloning.cpp lower.cpp lsra.cpp diff --git a/src/jit/DIRS.proj b/src/jit/DIRS.proj index bab4035597..3e4c86b5a4 100644 --- a/src/jit/DIRS.proj +++ b/src/jit/DIRS.proj @@ -1,4 +1,5 @@ -<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="dogfood"> <!--Import the settings--> <Import Project="$(_NTDRIVE)$(_NTROOT)\ndp\clr\clr.props" /> @@ -17,23 +18,31 @@ </PropertyGroup> <ItemGroup Condition="'$(BuildExePhase)' == '1'"> - <ProjectFile Include="dll\jit.nativeproj" /> + <!-- x86 and ARM clrjit.dll are built in the JIT32 directory; we build FrankenJit here --> + <ProjectFile Condition="'$(BuildArchitecture)' != 'i386' and '$(BuildArchitecture)' != 'arm'" Include="dll\jit.nativeproj" /> + </ItemGroup> + + <!-- Only the main JIT gets built for CoreSys. The other jits (e.g., altjits) do not. --> + <ItemGroup Condition="'$(BuildExePhase)' == '1' and '$(BuildProjectName)' != 'CoreSys'"> + + <!-- Build the "FrankenJit" (RyuJIT front-end, legacy back-end) and "FrankenAltjit". These can't conflict with the names of the JIT32 directory outputs. --> + <ProjectFile Condition="'$(BuildArchitecture)' == 'i386' or '$(BuildArchitecture)' == 'arm'" Include="frankenjit\frankenjit.nativeproj" /> + <ProjectFile Condition="'$(BuildArchitecture)' == 'i386'" Include="frankenaltjit\frankenaltjit.nativeproj" /> <!-- This might be useful, to help make sure JIT devs build all configurations of the JIT (including crossgen), but it appears to cause problems with the build system, and it slows down normal JIT developer productivity by adding a seldom-useful build. <ProjectFile Condition="'$(MDILGenerator)' != 'true'" Include="crossgen\jit_crossgen.nativeproj" /> --> - <ProjectFile Condition="'$(BuildProjectName)' != 'CoreSys' and '$(BuildArchitecture)' == 'arm'" Include="protojit\protojit.nativeproj" /> - <ProjectFile Condition="'$(BuildProjectName)' != 'CoreSys' and '$(BuildArchitecture)' == 'amd64'" Include="protojit\protojit.nativeproj" /> - <ProjectFile Condition="'$(BuildProjectName)' != 'CoreSys' and '$(BuildArchitecture)' == 'amd64'" Include="ctp\ctpjit.nativeproj" /> - <ProjectFile Condition="'$(BuildProjectName)' != 'CoreSys' and '$(BuildArchitecture)' == 'amd64'" Include="arm64altjit\arm64altjit.nativeproj" /> - <ProjectFile Condition="'$(BuildProjectName)' != 'CoreSys' and '$(BuildArchitecture)' == 'i386'" Include="protojit\protojit.nativeproj" /> - <ProjectFile Condition="'$(BuildProjectName)' != 'CoreSys' and '$(BuildArchitecture)' == 'i386'" Include="armdll\altjit.nativeproj" /> - <ProjectFile Condition="'$(BuildProjectName)' != 'CoreSys' and '$(BuildArchitecture)' == 'i386'" Include="protononjit\protononjit.nativeproj" /> + <ProjectFile Condition="'$(BuildArchitecture)' == 'arm'" Include="protojit\protojit.nativeproj" /> + <ProjectFile Condition="'$(BuildArchitecture)' == 'amd64'" Include="protojit\protojit.nativeproj" /> + <ProjectFile Condition="'$(BuildArchitecture)' == 'amd64'" Include="ctp\ctpjit.nativeproj" /> + <ProjectFile Condition="'$(BuildArchitecture)' == 'amd64'" Include="arm64altjit\arm64altjit.nativeproj" /> + <ProjectFile Condition="'$(BuildArchitecture)' == 'i386'" Include="protojit\protojit.nativeproj" /> + <ProjectFile Condition="'$(BuildArchitecture)' == 'i386'" Include="protononjit\protononjit.nativeproj" /> <!-- We could build skipjit for all architectures, but we only need it for x86 currently --> - <ProjectFile Condition="'$(BuildProjectName)' != 'CoreSys' and '$(BuildArchitecture)' == 'i386'" Include="skipjit\skipjit.nativeproj" /> + <ProjectFile Condition="'$(BuildArchitecture)' == 'i386'" Include="skipjit\skipjit.nativeproj" /> </ItemGroup> <!--Import the targets--> diff --git a/src/jit/assertionprop.cpp b/src/jit/assertionprop.cpp index fccec98089..88921480a0 100644 --- a/src/jit/assertionprop.cpp +++ b/src/jit/assertionprop.cpp @@ -99,11 +99,6 @@ void Compiler::optAddCopies() if (!varDsc->lvIsParam && info.compInitMem) continue; -#ifdef DEBUG - if (lvaTable[lclNum].lvDblWasInt) - continue; -#endif - // On x86 we may want to add a copy for an incoming double parameter // because we can ensure that the copy we make is double aligned // where as we can never ensure the alignment of an incoming double parameter @@ -474,37 +469,82 @@ void Compiler::optAddCopies() } } +//------------------------------------------------------------------------------ +// optVNConstantPropOnTree: Retrieve the assertions on this local variable +// +// Arguments: +// lclNum - The local var id. +// +// Return Value: +// The dependent assertions (assertions using the value of the local var) +// of the local var. +// + +ASSERT_TP& Compiler::GetAssertionDep(unsigned lclNum) +{ + ExpandArray<ASSERT_TP>& dep = *optAssertionDep; + if (dep[lclNum] == NULL) + { + dep[lclNum] = optNewEmptyAssertSet(); + } + return dep[lclNum]; +} /***************************************************************************** * - * Initialize the assertion prop tracking logic. + * Initialize the assertion prop bitset traits and the default bitsets. */ -void Compiler::optAssertionInit(bool isLocalProp) +void Compiler::optAssertionTraitsInit(AssertionIndex assertionCount) { - unsigned lclNum; - LclVarDsc * varDsc; + apTraits = new (getAllocator()) BitVecTraits(assertionCount, this); + apFull = BitVecOps::UninitVal(); + apEmpty = BitVecOps::UninitVal(); + BitVecOps::AssignNoCopy(apTraits, apFull, BitVecOps::MakeFull(apTraits)); + BitVecOps::AssignNoCopy(apTraits, apEmpty, BitVecOps::MakeEmpty(apTraits)); +} - for (lclNum = 0, varDsc = lvaTable; - lclNum < lvaCount; - lclNum++ , varDsc++) - { - varDsc->lvAssertionDep = 0; - } +/***************************************************************************** + * + * Initialize the assertion prop tracking logic. + */ - optAssertionCount = 0; - optAssertionPropagated = false; - optLocalAssertionProp = isLocalProp; - bbJtrueAssertionOut = nullptr; +void Compiler::optAssertionInit(bool isLocalProp) +{ + // Use a function countFunc to determine a proper maximum assertion count for the + // method being compiled. The function is linear to the IL size for small and + // moderate methods. For large methods, considering throughput impact, we track no + // more than 64 assertions. + // Note this tracks at most only 256 assertions. + static const AssertionIndex countFunc[] = { 64, 128, 256, 64 }; + static const unsigned lowerBound = 0; + static const unsigned upperBound = sizeof(countFunc) / sizeof(countFunc[0]) - 1; + const unsigned codeSize = info.compILCodeSize / 512; + optMaxAssertionCount = countFunc[isLocalProp ? lowerBound : min(upperBound, codeSize)]; + + optLocalAssertionProp = isLocalProp; + optAssertionTabPrivate = new (getAllocator()) AssertionDsc[optMaxAssertionCount]; + optComplementaryAssertionMap = new (getAllocator()) AssertionIndex[optMaxAssertionCount](); // zero-inited (NO_ASSERTION_INDEX.) + assert(NO_ASSERTION_INDEX == 0); if (!isLocalProp) { optValueNumToAsserts = new (getAllocator()) ValueNumToAssertsMap(getAllocator()); } + + if (optAssertionDep == NULL) + { + optAssertionDep = new (getAllocator()) ExpandArray<ASSERT_TP>(getAllocator(), max(1, lvaCount)); + } + + optAssertionTraitsInit(optMaxAssertionCount); + optAssertionCount = 0; + optAssertionPropagated = false; + bbJtrueAssertionOut = NULL; } #ifdef DEBUG -void Compiler::optPrintAssertion(AssertionDsc* curAssertion, unsigned assertionIndex /* =0 */) +void Compiler::optPrintAssertion(AssertionDsc* curAssertion, AssertionIndex assertionIndex /* =0 */) { if (curAssertion->op1.kind == O1K_EXACT_TYPE) { @@ -537,6 +577,10 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, unsigned assertion printf("?assertion classification? "); } printf("Assertion: "); + if (!optLocalAssertionProp) + { + printf("(%d, %d) ", curAssertion->op1.vn, curAssertion->op2.vn); + } if (!optLocalAssertionProp) { @@ -571,6 +615,11 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, unsigned assertion printf("Loop_Bnd"); vnStore->vnDump(this, curAssertion->op1.vn); } + else if (curAssertion->op1.kind == O1K_CONSTANT_LOOP_BND) + { + printf("Loop_Bnd"); + vnStore->vnDump(this, curAssertion->op1.vn); + } else { printf("?op1.kind?"); @@ -637,6 +686,11 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, unsigned assertion assert(!optLocalAssertionProp); vnStore->vnDump(this, curAssertion->op2.vn); } + else if (curAssertion->op1.kind == O1K_CONSTANT_LOOP_BND) + { + assert(!optLocalAssertionProp); + vnStore->vnDump(this, curAssertion->op2.vn); + } else { unsigned lclNum = curAssertion->op1.lcl.lclNum; assert(lclNum < lvaCount); @@ -679,7 +733,13 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, unsigned assertion if (assertionIndex > 0) { - printf(" :: index=#%02u, mask=%s", assertionIndex, genES2str(genCSEnum2bit(assertionIndex))); + printf("index=#%02u, mask=", assertionIndex); + + // This is an hack to reuse a known empty set in order to display + // a single bit mask. + BitVecOps::AddElemD(apTraits, apEmpty, assertionIndex - 1); + printf("%s", BitVecOps::ToString(apTraits, apEmpty)); + BitVecOps::RemoveElemD(apTraits, apEmpty, assertionIndex - 1); } printf("\n"); } @@ -691,12 +751,12 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, unsigned assertion * is NO_ASSERTION_INDEX and "optAssertionCount" is the last valid index. * */ -Compiler::AssertionDsc * Compiler::optGetAssertion(unsigned assertIndex) +Compiler::AssertionDsc * Compiler::optGetAssertion(AssertionIndex assertIndex) { assert(NO_ASSERTION_INDEX == 0); noway_assert(assertIndex != NO_ASSERTION_INDEX); - if (assertIndex > MAX_ASSERTION_CNT) + if (assertIndex > optMaxAssertionCount) { return nullptr; } @@ -709,7 +769,7 @@ Compiler::AssertionDsc * Compiler::optGetAssertion(unsigned assertIndex) * if they don't care about it. Refer overloaded method optCreateAssertion. * */ -unsigned Compiler::optCreateAssertion(GenTreePtr op1, GenTreePtr op2, optAssertionKind assertionKind) +Compiler::AssertionIndex Compiler::optCreateAssertion(GenTreePtr op1, GenTreePtr op2, optAssertionKind assertionKind) { AssertionDsc assertionDsc; return optCreateAssertion(op1, op2, assertionKind, &assertionDsc); @@ -731,7 +791,7 @@ unsigned Compiler::optCreateAssertion(GenTreePtr op1, GenTreePtr op2, optAsserti * NO_ASSERTION_INDEX and we could not create the assertion. * */ -unsigned Compiler::optCreateAssertion(GenTreePtr op1, GenTreePtr op2, +Compiler::AssertionIndex Compiler::optCreateAssertion(GenTreePtr op1, GenTreePtr op2, optAssertionKind assertionKind, AssertionDsc* assertion) { @@ -846,13 +906,6 @@ unsigned Compiler::optCreateAssertion(GenTreePtr op1, GenTreePtr op2, goto DONE_ASSERTION; // Don't make an assertion } -#ifdef DEBUG - if (lclVar->lvDblWasInt) - { - goto DONE_ASSERTION; // Don't make an assertion - } -#endif - if (haveArgs) { // @@ -1290,18 +1343,40 @@ bool Compiler::optIsTreeKnownIntValue(bool vnBased, GenTreePtr tree, ssize_t* pC return false; } +#ifdef DEBUG +/***************************************************************************** + * + * Print the assertions related to a VN for all VNs. + * + */ +void Compiler::optPrintVnAssertionMapping() +{ + printf("\nVN Assertion Mapping\n"); + printf("---------------------\n"); + for (ValueNumToAssertsMap::KeyIterator ki = optValueNumToAsserts->Begin(); + !ki.Equal(optValueNumToAsserts->End()); ++ki) + { + printf("(%d => ", ki.Get()); + printf("%s)\n", BitVecOps::ToString(apTraits, ki.GetValue())); + } +} +#endif + /***************************************************************************** * * Maintain a map "optValueNumToAsserts" i.e., vn -> to set of assertions * about that VN. Given "assertions" about a "vn" add it to the previously * mapped assertions about that "vn." */ -void Compiler::optAddVnAssertionMapping(ValueNum vn, const EXPSET_TP& assertions) +void Compiler::optAddVnAssertionMapping(ValueNum vn, AssertionIndex index) { - EXPSET_TP cur = 0; - optValueNumToAsserts->Lookup(vn, &cur); - cur |= assertions; - optValueNumToAsserts->Set(vn, cur); + ASSERT_TP cur; + if (!optValueNumToAsserts->Lookup(vn, &cur)) + { + cur = optNewEmptyAssertSet(); + optValueNumToAsserts->Set(vn, cur); + } + BitVecOps::AddElemD(apTraits, cur, index - 1); } /***************************************************************************** @@ -1342,7 +1417,7 @@ bool Compiler::optAssertionVnInvolvesNan(AssertionDsc* assertion) * we use to refer to this element. * If we need to add to the table and the table is full return the value zero */ -unsigned Compiler::optAddAssertion(AssertionDsc* newAssertion) +Compiler::AssertionIndex Compiler::optAddAssertion(AssertionDsc* newAssertion) { noway_assert(newAssertion->assertionKind != OAK_INVALID); @@ -1354,9 +1429,8 @@ unsigned Compiler::optAddAssertion(AssertionDsc* newAssertion) return NO_ASSERTION_INDEX; } - - // Check if exists already, so we can skip adding new one. - for (unsigned index = 1; index <= optAssertionCount; index++) + // Check if exists already, so we can skip adding new one. Search backwards. + for (AssertionIndex index = optAssertionCount; index >= 1; index--) { AssertionDsc* curAssertion = optGetAssertion(index); if (curAssertion->Equals(newAssertion, !optLocalAssertionProp)) @@ -1366,7 +1440,7 @@ unsigned Compiler::optAddAssertion(AssertionDsc* newAssertion) } // Check if we are within max count. - if (optAssertionCount >= MAX_ASSERTION_CNT) + if (optAssertionCount >= optMaxAssertionCount) { return NO_ASSERTION_INDEX; } @@ -1385,25 +1459,26 @@ unsigned Compiler::optAddAssertion(AssertionDsc* newAssertion) #endif // DEBUG // Assertion mask bits are [index + 1]. - EXPSET_TP assertionBit = optGetAssertionBit(optAssertionCount); if (optLocalAssertionProp) { assert(newAssertion->op1.kind == O1K_LCLVAR); - // Mark the variables this index depends on - lvaTable[newAssertion->op1.lcl.lclNum].lvAssertionDep |= assertionBit; + // Mark the variables this index depends on + unsigned lclNum = newAssertion->op1.lcl.lclNum; + BitVecOps::AddElemD(apTraits, GetAssertionDep(lclNum), optAssertionCount - 1); if (newAssertion->op2.kind == O2K_LCLVAR_COPY) { - lvaTable[newAssertion->op2.lcl.lclNum].lvAssertionDep |= assertionBit; + lclNum = newAssertion->op2.lcl.lclNum; + BitVecOps::AddElemD(apTraits, GetAssertionDep(lclNum), optAssertionCount - 1); } } else // If global assertion prop, then add it to the dependents map. { - optAddVnAssertionMapping(newAssertion->op1.vn, assertionBit); + optAddVnAssertionMapping(newAssertion->op1.vn, optAssertionCount); if (newAssertion->op2.kind == O2K_LCLVAR_COPY) { - optAddVnAssertionMapping(newAssertion->op2.vn, assertionBit); + optAddVnAssertionMapping(newAssertion->op2.vn, optAssertionCount); } } @@ -1421,11 +1496,11 @@ unsigned Compiler::optAddAssertion(AssertionDsc* newAssertion) * If "index" is between 1 and optAssertionCount, then verify the assertion * desc corresponding to "index." */ -void Compiler::optDebugCheckAssertions(unsigned index) +void Compiler::optDebugCheckAssertions(AssertionIndex index) { - unsigned start = (index == NO_ASSERTION_INDEX) ? 1 : index; - unsigned end = (index == NO_ASSERTION_INDEX) ? optAssertionCount : index; - for (unsigned ind = start; ind <= end; ++ind) + AssertionIndex start = (index == NO_ASSERTION_INDEX) ? 1 : index; + AssertionIndex end = (index == NO_ASSERTION_INDEX) ? optAssertionCount : index; + for (AssertionIndex ind = start; ind <= end; ++ind) { AssertionDsc* assertion = optGetAssertion(ind); switch (assertion->op2.kind) @@ -1441,6 +1516,7 @@ void Compiler::optDebugCheckAssertions(unsigned index) break; case O1K_ARRLEN_OPER_BND: case O1K_ARRLEN_LOOP_BND: + case O1K_CONSTANT_LOOP_BND: assert(!optLocalAssertionProp); break; default: @@ -1466,10 +1542,17 @@ void Compiler::optDebugCheckAssertions(unsigned index) * */ -void Compiler::optCreateComplementaryAssertion(const AssertionDsc& candidateAssertion, GenTreePtr op1, GenTreePtr op2) +void Compiler::optCreateComplementaryAssertion(AssertionIndex assertionIndex, GenTreePtr op1, GenTreePtr op2) { + if (assertionIndex == NO_ASSERTION_INDEX) + { + return; + } + + AssertionDsc& candidateAssertion = *optGetAssertion(assertionIndex); if (candidateAssertion.op1.kind == O1K_ARRLEN_OPER_BND || - candidateAssertion.op1.kind == O1K_ARRLEN_LOOP_BND) + candidateAssertion.op1.kind == O1K_ARRLEN_LOOP_BND || + candidateAssertion.op1.kind == O1K_CONSTANT_LOOP_BND) { AssertionDsc dsc = candidateAssertion; dsc.assertionKind = dsc.assertionKind == OAK_EQUAL ? OAK_NOT_EQUAL : OAK_EQUAL; @@ -1479,11 +1562,13 @@ void Compiler::optCreateComplementaryAssertion(const AssertionDsc& candidateAsse if (candidateAssertion.assertionKind == OAK_EQUAL) { - optCreateAssertion(op1, op2, OAK_NOT_EQUAL); + AssertionIndex index = optCreateAssertion(op1, op2, OAK_NOT_EQUAL); + optMapComplementary(index, assertionIndex); } else if (candidateAssertion.assertionKind == OAK_NOT_EQUAL) { - optCreateAssertion(op1, op2, OAK_EQUAL); + AssertionIndex index = optCreateAssertion(op1, op2, OAK_EQUAL); + optMapComplementary(index, assertionIndex); } // Are we making a subtype or exact type assertion? @@ -1505,20 +1590,20 @@ void Compiler::optCreateComplementaryAssertion(const AssertionDsc& candidateAsse * for the operands. */ -unsigned Compiler::optCreateJtrueAssertions(GenTreePtr op1, GenTreePtr op2, Compiler::optAssertionKind assertionKind) +Compiler::AssertionIndex Compiler::optCreateJtrueAssertions(GenTreePtr op1, GenTreePtr op2, Compiler::optAssertionKind assertionKind) { - AssertionDsc candidateAssertion; - unsigned assertionIndex = optCreateAssertion(op1, op2, assertionKind, &candidateAssertion); + AssertionDsc candidateAssertion; + AssertionIndex assertionIndex = optCreateAssertion(op1, op2, assertionKind, &candidateAssertion); // Don't bother if we don't have an assertion on the JTrue False path. Current implementation // allows for a complementary only if there is an assertion on the False path (tree->HasAssertion()). if (assertionIndex != NO_ASSERTION_INDEX) { - optCreateComplementaryAssertion(candidateAssertion, op1, op2); + optCreateComplementaryAssertion(assertionIndex, op1, op2); } return assertionIndex; } -unsigned Compiler::optCreateJTrueArrayAssertion(GenTreePtr tree) +Compiler::AssertionIndex Compiler::optCreateJTrueBoundsAssertion(GenTreePtr tree) { GenTreePtr relop = tree->gtGetOp1(); if ((relop->OperKind() & GTK_RELOP) == 0) @@ -1544,8 +1629,8 @@ unsigned Compiler::optCreateJTrueArrayAssertion(GenTreePtr tree) dsc.op2.vn = vnStore->VNZeroForType(op2->TypeGet()); dsc.op2.u1.iconVal = 0; dsc.op2.u1.iconFlags = 0; - unsigned index = optAddAssertion(&dsc); - optCreateComplementaryAssertion(dsc, nullptr, nullptr); + AssertionIndex index = optAddAssertion(&dsc); + optCreateComplementaryAssertion(index, nullptr, nullptr); return index; } // Cases where op1 holds the condition array length and op2 is 0. @@ -1563,8 +1648,8 @@ unsigned Compiler::optCreateJTrueArrayAssertion(GenTreePtr tree) dsc.op2.vn = vnStore->VNZeroForType(op2->TypeGet()); dsc.op2.u1.iconVal = 0; dsc.op2.u1.iconFlags = 0; - unsigned index = optAddAssertion(&dsc); - optCreateComplementaryAssertion(dsc, nullptr, nullptr); + AssertionIndex index = optAddAssertion(&dsc); + optCreateComplementaryAssertion(index, nullptr, nullptr); return index; } // Cases where op1 holds the lhs of the condition op2 holds rhs. @@ -1580,10 +1665,47 @@ unsigned Compiler::optCreateJTrueArrayAssertion(GenTreePtr tree) dsc.op2.vn = vnStore->VNZeroForType(TYP_INT); dsc.op2.u1.iconVal = 0; dsc.op2.u1.iconFlags = 0; - unsigned index = optAddAssertion(&dsc); - optCreateComplementaryAssertion(dsc, nullptr, nullptr); + AssertionIndex index = optAddAssertion(&dsc); + optCreateComplementaryAssertion(index, nullptr, nullptr); + return index; + } + // Cases where op1 holds the condition bound check and op2 is 0. + // Loop condition like: "i < 100 == 0" + // Assertion: "i < 100 == false" + else if (vnStore->IsVNConstantBound(vn) && + (op2->gtVNPair.GetConservative() == vnStore->VNZeroForType(op2->TypeGet())) && + (relop->gtOper == GT_EQ || relop->gtOper == GT_NE)) + { + AssertionDsc dsc; + dsc.assertionKind = relop->gtOper == GT_EQ ? OAK_EQUAL : OAK_NOT_EQUAL; + dsc.op1.kind = O1K_CONSTANT_LOOP_BND; + dsc.op1.vn = vn; + dsc.op2.kind = O2K_CONST_INT; + dsc.op2.vn = vnStore->VNZeroForType(op2->TypeGet()); + dsc.op2.u1.iconVal = 0; + dsc.op2.u1.iconFlags = 0; + AssertionIndex index = optAddAssertion(&dsc); + optCreateComplementaryAssertion(index, nullptr, nullptr); + return index; + } + // Cases where op1 holds the lhs of the condition op2 holds rhs. + // Loop condition like "i < 100" + // Assertion: "i < 100 != 0" + else if (vnStore->IsVNConstantBound(relop->gtVNPair.GetConservative())) + { + AssertionDsc dsc; + dsc.assertionKind = OAK_NOT_EQUAL; + dsc.op1.kind = O1K_CONSTANT_LOOP_BND; + dsc.op1.vn = relop->gtVNPair.GetConservative(); + dsc.op2.kind = O2K_CONST_INT; + dsc.op2.vn = vnStore->VNZeroForType(TYP_INT); + dsc.op2.u1.iconVal = 0; + dsc.op2.u1.iconFlags = 0; + AssertionIndex index = optAddAssertion(&dsc); + optCreateComplementaryAssertion(index, nullptr, nullptr); return index; } + return NO_ASSERTION_INDEX; } @@ -1591,7 +1713,7 @@ unsigned Compiler::optCreateJTrueArrayAssertion(GenTreePtr tree) * * Compute assertions for the JTrue node. */ -unsigned Compiler::optAssertionGenJtrue(GenTreePtr tree) +Compiler::AssertionIndex Compiler::optAssertionGenJtrue(GenTreePtr tree) { // Only create assertions for JTRUE when we are in the global phase if (optLocalAssertionProp) @@ -1610,7 +1732,7 @@ unsigned Compiler::optAssertionGenJtrue(GenTreePtr tree) GenTreePtr op1 = relop->gtOp.gtOp1; GenTreePtr op2 = relop->gtOp.gtOp2; - unsigned index = optCreateJTrueArrayAssertion(tree); + AssertionIndex index = optCreateJTrueBoundsAssertion(tree); if (index != NO_ASSERTION_INDEX) { return index; @@ -1695,7 +1817,7 @@ unsigned Compiler::optAssertionGenJtrue(GenTreePtr tree) * from all of the constituent phi operands. * */ -unsigned Compiler::optAssertionGenPhiDefn(GenTreePtr tree) +Compiler::AssertionIndex Compiler::optAssertionGenPhiDefn(GenTreePtr tree) { if (!tree->IsPhiDefn()) { @@ -1747,7 +1869,7 @@ void Compiler::optAssertionGen(GenTreePtr tree) // For most of the assertions that we create below // the assertion is true after the tree is processed bool assertionProven = true; - unsigned assertionIndex = NO_ASSERTION_INDEX; + AssertionIndex assertionIndex = NO_ASSERTION_INDEX; switch (tree->gtOper) { case GT_ASG: @@ -1833,10 +1955,25 @@ void Compiler::optAssertionGen(GenTreePtr tree) /***************************************************************************** * + * Maps a complementary assertion to its original assertion so it can be + * retrieved faster. + */ +void Compiler::optMapComplementary(AssertionIndex assertionIndex, AssertionIndex index) +{ + if (assertionIndex == NO_ASSERTION_INDEX || index == NO_ASSERTION_INDEX) + { + return; + } + optComplementaryAssertionMap[assertionIndex] = index; + optComplementaryAssertionMap[index] = assertionIndex; +} + +/***************************************************************************** + * * Given an assertion index, return the assertion index of the complementary * assertion or 0 if one does not exist. */ -unsigned Compiler::optFindComplementary(unsigned assertIndex) +Compiler::AssertionIndex Compiler::optFindComplementary(AssertionIndex assertIndex) { if (assertIndex == NO_ASSERTION_INDEX) { @@ -1850,13 +1987,20 @@ unsigned Compiler::optFindComplementary(unsigned assertIndex) return NO_ASSERTION_INDEX; } + AssertionIndex index = optComplementaryAssertionMap[assertIndex]; + if (index != NO_ASSERTION_INDEX && index <= optAssertionCount) + { + return index; + } + optAssertionKind complementaryAssertionKind = (inputAssertion->assertionKind == OAK_EQUAL) ? OAK_NOT_EQUAL : OAK_EQUAL; - for (unsigned index = 1; index <= optAssertionCount; ++index) + for (AssertionIndex index = 1; index <= optAssertionCount; ++index) { // Make sure assertion kinds are complementary and op1, op2 kinds match. AssertionDsc* curAssertion = optGetAssertion(index); if (curAssertion->Complementary(inputAssertion, !optLocalAssertionProp)) { + optMapComplementary(assertIndex, index); return index; } } @@ -1871,20 +2015,17 @@ unsigned Compiler::optFindComplementary(unsigned assertIndex) * if one such assertion could not be found in "assertions." */ -unsigned Compiler::optAssertionIsSubrange(GenTreePtr tree, var_types toType, EXPSET_TP assertions) +Compiler::AssertionIndex Compiler::optAssertionIsSubrange(GenTreePtr tree, var_types toType, ASSERT_VALARG_TP assertions) { - if (!optLocalAssertionProp && assertions == 0) + if (!optLocalAssertionProp && BitVecOps::IsEmpty(apTraits, assertions)) { return NO_ASSERTION_INDEX; } - EXPSET_TP mask = 1; - for (unsigned index = 1; index <= optAssertionCount; index++, mask <<= 1) + for (AssertionIndex index = 1; index <= optAssertionCount; index++) { - noway_assert(mask == optGetAssertionBit(index)); - AssertionDsc* curAssertion = optGetAssertion(index); - if ((optLocalAssertionProp || (assertions & mask)) && // either local prop or use propagated assertions + if ((optLocalAssertionProp || BitVecOps::IsMember(apTraits, assertions, index - 1)) && // either local prop or use propagated assertions (curAssertion->assertionKind == OAK_SUBRANGE) && (curAssertion->op1.kind == O1K_LCLVAR)) { @@ -1939,18 +2080,15 @@ unsigned Compiler::optAssertionIsSubrange(GenTreePtr tree, var_types toType, EXP * could not be found, then it returns NO_ASSERTION_INDEX. * */ -unsigned Compiler::optAssertionIsSubtype(GenTreePtr tree, GenTreePtr methodTableArg, EXPSET_TP assertions) +Compiler::AssertionIndex Compiler::optAssertionIsSubtype(GenTreePtr tree, GenTreePtr methodTableArg, ASSERT_VALARG_TP assertions) { - if (!optLocalAssertionProp && assertions == 0) + if (!optLocalAssertionProp && BitVecOps::IsEmpty(apTraits, assertions)) { return NO_ASSERTION_INDEX; } - EXPSET_TP mask = 1; - for (unsigned index = 1; index <= optAssertionCount; index++, mask <<= 1) + for (AssertionIndex index = 1; index <= optAssertionCount; index++) { - assert(mask == optGetAssertionBit(index)); - - if (!optLocalAssertionProp && !(assertions & mask)) + if (!optLocalAssertionProp && !BitVecOps::IsMember(apTraits, assertions, index - 1)) { continue; } @@ -1998,54 +2136,78 @@ unsigned Compiler::optAssertionIsSubtype(GenTreePtr tree, GenTreePtr methodTable return NO_ASSERTION_INDEX; } -/******************************************************************************************************* - * - * Perform value numbering based constant propagation on the tree node. Given a "tree" that is identified - * by value numbering system as having a "constant vn," then substitute the given node by the constant - * of the correct type. - * - */ -GenTreePtr Compiler::optVnConstantAssertionProp(const GenTreePtr tree, const GenTreePtr stmt) +//------------------------------------------------------------------------------ +// optVNConstantPropOnTree: Substitutes tree with an evaluated constant while +// managing ref-counts and side-effects. +// +// Arguments: +// block - The block containing the tree. +// stmt - The statement in the block containing the tree. +// tree - The tree node whose value is known at compile time. +// The tree should have a constant value number. +// +// Return Value: +// Returns a potentially new or a transformed tree node. +// Returns nullptr when no transformation is possible. +// +// Description: +// Transforms a tree node if its result evaluates to a constant. The +// transformation can be a "ChangeOper" to a constant or a new constant node +// with extracted side-effects. +// +// Before replacing or substituting the "tree" with a constant, extracts any +// side effects from the "tree" and creates a comma separated side effect list +// and then appends the transformed node at the end of the list. +// This comma separated list is then returned. +// +// For JTrue nodes, side effects are not put into a comma separated list. If +// the relop will evaluate to "true" or "false" statically, then the side-effects +// will be put into new statements, presuming the JTrue will be folded away. +// +// The ref-counts of any variables in the tree being replaced, will be +// appropriately decremented. The ref-counts of variables in the side-effect +// nodes will be retained. +// +GenTreePtr Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree) { - assert(!optLocalAssertionProp); - - if (!vnStore->IsVNConstant(tree->gtVNPair.GetConservative())) + if (tree->OperGet() == GT_JTRUE) { - return nullptr; + // Treat JTRUE separately to extract side effects into respective statements rather + // than using a COMMA separated op1. + return optVNConstantPropOnJTrue(block, stmt, tree); } - - // Don't propagate floating-point constants into a TYP_STRUCT LclVar - // This can occur for HFA return values (see hfa_sf3E_r.exe) - // - if (tree->TypeGet() == TYP_STRUCT) + // If relop is part of JTRUE, this should be optimized as part of the parent JTRUE. + // Or if relop is part of QMARK or anything else, we simply bail here. + else if (tree->OperIsCompare() && (tree->gtFlags & GTF_RELOP_JMP_USED)) { return nullptr; } -#ifdef DEBUG - if (verbose) + ValueNum vnCns = tree->gtVNPair.GetConservative(); + ValueNum vnLib = tree->gtVNPair.GetLiberal(); + + // Check if node evaluates to a constant. + if (!vnStore->IsVNConstant(vnCns)) { - printf("\nVN based constant assertion prop in BB%02u:\n", compCurBB->bbNum); - printf("Old Tree: "); - gtDispTree(tree, 0, nullptr, true); + return nullptr; } -#endif - - ValueNum vnCns = tree->gtVNPair.GetConservative(); // Save the ValueNum - GenTreePtr newTree = tree; - switch (vnStore->TypeOfVN(tree->gtVNPair.GetConservative())) + GenTreePtr newTree = tree; + GenTreePtr sideEffList = nullptr; + switch (vnStore->TypeOfVN(vnCns)) { case TYP_FLOAT: - lvaRecursiveDecRefCounts(newTree); - newTree->ChangeOperConst(GT_CNS_DBL); - newTree->gtDblCon.gtDconVal = vnStore->ConstantValue<float>(vnCns); + newTree = optPrepareTreeForReplacement(tree, tree); + tree->ChangeOperConst(GT_CNS_DBL); + tree->gtDblCon.gtDconVal = vnStore->ConstantValue<float>(vnCns); + tree->gtVNPair = ValueNumPair(vnLib, vnCns); break; case TYP_DOUBLE: - lvaRecursiveDecRefCounts(newTree); - newTree->ChangeOperConst(GT_CNS_DBL); - newTree->gtDblCon.gtDconVal = vnStore->ConstantValue<double>(vnCns); + newTree = optPrepareTreeForReplacement(tree, tree); + tree->ChangeOperConst(GT_CNS_DBL); + tree->gtDblCon.gtDconVal = vnStore->ConstantValue<double>(vnCns); + tree->gtVNPair = ValueNumPair(vnLib, vnCns); break; case TYP_LONG: @@ -2054,8 +2216,9 @@ GenTreePtr Compiler::optVnConstantAssertionProp(const GenTreePtr tree, const Gen #ifdef _TARGET_64BIT_ if (vnStore->IsVNHandle(vnCns)) { - lvaRecursiveDecRefCounts(newTree); newTree = gtNewIconHandleNode(value, vnStore->GetHandleFlags(vnCns)); + newTree->gtVNPair = ValueNumPair(vnLib, vnCns); + newTree = optPrepareTreeForReplacement(tree, newTree); } else #endif @@ -2063,27 +2226,31 @@ GenTreePtr Compiler::optVnConstantAssertionProp(const GenTreePtr tree, const Gen switch (tree->TypeGet()) { case TYP_INT: - lvaRecursiveDecRefCounts(newTree); - newTree->ChangeOperConst(GT_CNS_INT); - newTree->gtIntCon.gtIconVal = (int) value; + newTree = optPrepareTreeForReplacement(tree, tree); + tree->ChangeOperConst(GT_CNS_INT); + tree->gtIntCon.gtIconVal = (int) value; + tree->gtVNPair = ValueNumPair(vnLib, vnCns); break; case TYP_LONG: - lvaRecursiveDecRefCounts(newTree); - newTree->ChangeOperConst(GT_CNS_NATIVELONG); - newTree->gtIntConCommon.SetLngValue(value); + newTree = optPrepareTreeForReplacement(tree, tree); + tree->ChangeOperConst(GT_CNS_NATIVELONG); + tree->gtIntConCommon.SetLngValue(value); + tree->gtVNPair = ValueNumPair(vnLib, vnCns); break; case TYP_FLOAT: - lvaRecursiveDecRefCounts(newTree); - newTree->ChangeOperConst(GT_CNS_DBL); - newTree->gtDblCon.gtDconVal = (float) value; + newTree = optPrepareTreeForReplacement(tree, tree); + tree->ChangeOperConst(GT_CNS_DBL); + tree->gtDblCon.gtDconVal = (float) value; + tree->gtVNPair = ValueNumPair(vnLib, vnCns); break; case TYP_DOUBLE: - lvaRecursiveDecRefCounts(newTree); - newTree->ChangeOperConst(GT_CNS_DBL); - newTree->gtDblCon.gtDconVal = (double) value; + newTree = optPrepareTreeForReplacement(tree, tree); + tree->ChangeOperConst(GT_CNS_DBL); + tree->gtDblCon.gtDconVal = (double) value; + tree->gtVNPair = ValueNumPair(vnLib, vnCns); break; default: @@ -2098,10 +2265,11 @@ GenTreePtr Compiler::optVnConstantAssertionProp(const GenTreePtr tree, const Gen return nullptr; assert(vnStore->ConstantValue<size_t>(vnCns) == 0); - lvaRecursiveDecRefCounts(newTree); - newTree->ChangeOperConst(GT_CNS_INT); - newTree->gtIntCon.gtIconVal = 0; - newTree->ClearIconHandleMask(); + newTree = optPrepareTreeForReplacement(tree, tree); + tree->ChangeOperConst(GT_CNS_INT); + tree->gtIntCon.gtIconVal = 0; + tree->ClearIconHandleMask(); + tree->gtVNPair = ValueNumPair(vnLib, vnCns); break; case TYP_INT: @@ -2110,8 +2278,9 @@ GenTreePtr Compiler::optVnConstantAssertionProp(const GenTreePtr tree, const Gen #ifndef _TARGET_64BIT_ if (vnStore->IsVNHandle(vnCns)) { - lvaRecursiveDecRefCounts(newTree); newTree = gtNewIconHandleNode(value, vnStore->GetHandleFlags(vnCns)); + newTree->gtVNPair = ValueNumPair(vnLib, vnCns); + newTree = optPrepareTreeForReplacement(tree, newTree); } else #endif @@ -2120,28 +2289,32 @@ GenTreePtr Compiler::optVnConstantAssertionProp(const GenTreePtr tree, const Gen { case TYP_REF: case TYP_INT: - lvaRecursiveDecRefCounts(newTree); - newTree->ChangeOperConst(GT_CNS_INT); - newTree->gtIntCon.gtIconVal = value; - newTree->ClearIconHandleMask(); + newTree = optPrepareTreeForReplacement(tree, tree); + tree->ChangeOperConst(GT_CNS_INT); + tree->gtIntCon.gtIconVal = value; + tree->ClearIconHandleMask(); + tree->gtVNPair = ValueNumPair(vnLib, vnCns); break; case TYP_LONG: - lvaRecursiveDecRefCounts(newTree); - newTree->ChangeOperConst(GT_CNS_NATIVELONG); - newTree->gtIntConCommon.SetLngValue((INT64) value); + newTree = optPrepareTreeForReplacement(tree, tree); + tree->ChangeOperConst(GT_CNS_NATIVELONG); + tree->gtIntConCommon.SetLngValue((INT64) value); + tree->gtVNPair = ValueNumPair(vnLib, vnCns); break; case TYP_FLOAT: - lvaRecursiveDecRefCounts(newTree); - newTree->ChangeOperConst(GT_CNS_DBL); - newTree->gtDblCon.gtDconVal = (float) value; + newTree = optPrepareTreeForReplacement(tree, tree); + tree->ChangeOperConst(GT_CNS_DBL); + tree->gtDblCon.gtDconVal = (float) value; + tree->gtVNPair = ValueNumPair(vnLib, vnCns); break; case TYP_DOUBLE: - lvaRecursiveDecRefCounts(newTree); - newTree->ChangeOperConst(GT_CNS_DBL); - newTree->gtDblCon.gtDconVal = (double) value; + newTree = optPrepareTreeForReplacement(tree, tree); + tree->ChangeOperConst(GT_CNS_DBL); + tree->gtDblCon.gtDconVal = (double) value; + tree->gtVNPair = ValueNumPair(vnLib, vnCns); break; default: @@ -2154,17 +2327,7 @@ GenTreePtr Compiler::optVnConstantAssertionProp(const GenTreePtr tree, const Gen default: return nullptr; } - - newTree->gtVNPair.SetBoth(vnCns); // Preserve the ValueNumPair, as ChangeOperConst/SetOper will clear it. - -#ifdef DEBUG - if (verbose) - { - printf("New Tree: "); - gtDispTree(newTree, 0, nullptr, true); - } -#endif - return optAssertionProp_Update(newTree, tree, stmt); + return newTree; } /******************************************************************************************************* @@ -2172,7 +2335,7 @@ GenTreePtr Compiler::optVnConstantAssertionProp(const GenTreePtr tree, const Gen * Perform constant propagation on a tree given the "curAssertion" is true at the point of the "tree." * */ -GenTreePtr Compiler::optConstantAssertionProp(AssertionDsc* curAssertion, GenTreePtr tree, GenTreePtr stmt DEBUGARG(unsigned index)) +GenTreePtr Compiler::optConstantAssertionProp(AssertionDsc* curAssertion, GenTreePtr tree, GenTreePtr stmt DEBUGARG(AssertionIndex index)) { unsigned lclNum = tree->gtLclVarCommon.gtLclNum; @@ -2337,7 +2500,7 @@ bool Compiler::optAssertionProp_LclVarTypeCheck(GenTreePtr tree, LclVarDsc* lclV * the "curAssertion." * */ -GenTreePtr Compiler::optCopyAssertionProp(AssertionDsc* curAssertion, GenTreePtr tree, GenTreePtr stmt DEBUGARG(unsigned index)) +GenTreePtr Compiler::optCopyAssertionProp(AssertionDsc* curAssertion, GenTreePtr tree, GenTreePtr stmt DEBUGARG(AssertionIndex index)) { const AssertionDsc::AssertionDscOp1& op1 = curAssertion->op1; const AssertionDsc::AssertionDscOp2& op2 = curAssertion->op2; @@ -2413,7 +2576,7 @@ GenTreePtr Compiler::optCopyAssertionProp(AssertionDsc* curAssertion, GenTreePtr * be nullptr. Returns the modified tree, or nullptr if no assertion prop took place. */ -GenTreePtr Compiler::optAssertionProp_LclVar(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) +GenTreePtr Compiler::optAssertionProp_LclVar(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) { assert(tree->gtOper == GT_LCL_VAR); // If we have a var definition then bail or @@ -2423,40 +2586,20 @@ GenTreePtr Compiler::optAssertionProp_LclVar(EXPSET_TP assertions, const GenTree { return nullptr; } - - // If global assertion prop, perform value numbering based constant prop. - if (!optLocalAssertionProp) - { -#if FEATURE_ANYCSE - // Don't perform constant prop for CSE LclVars - if (!lclNumIsCSE(tree->AsLclVarCommon()->GetLclNum())) -#endif - { - GenTreePtr newTree = optVnConstantAssertionProp(tree, stmt); - if (newTree != nullptr) - { - return newTree; - } - } - } - - // Check each assertion to see if it can be applied here. - EXPSET_TP mask = 1; - for (unsigned index = 1; index <= optAssertionCount; index++, mask <<= 1) + + BitVecOps::Iter iter(apTraits, assertions); + unsigned index = 0; + while (iter.NextElem(apTraits, &index)) { + index++; + if (index > optAssertionCount) break; // See if the variable is equal to a constant or another variable. - AssertionDsc* curAssertion = optGetAssertion(index); + AssertionDsc* curAssertion = optGetAssertion((AssertionIndex)index); if (curAssertion->assertionKind != OAK_EQUAL || curAssertion->op1.kind != O1K_LCLVAR) { continue; } - // If the current assertion is not in the set of assertions, continue. - if (!(mask & assertions)) - { - continue; - } - // Copy prop. if (curAssertion->op2.kind == O2K_LCLVAR_COPY) { @@ -2467,7 +2610,7 @@ GenTreePtr Compiler::optAssertionProp_LclVar(EXPSET_TP assertions, const GenTree if (optLocalAssertionProp) { // Perform copy assertion prop. - GenTreePtr newTree = optCopyAssertionProp(curAssertion, tree, stmt DEBUGARG(index)); + GenTreePtr newTree = optCopyAssertionProp(curAssertion, tree, stmt DEBUGARG((AssertionIndex)index)); if (newTree == nullptr) { // Skip and try next assertion. @@ -2487,7 +2630,7 @@ GenTreePtr Compiler::optAssertionProp_LclVar(EXPSET_TP assertions, const GenTree // If local assertion prop just, perform constant prop. if (optLocalAssertionProp) { - return optConstantAssertionProp(curAssertion, tree, stmt DEBUGARG(index)); + return optConstantAssertionProp(curAssertion, tree, stmt DEBUGARG((AssertionIndex)index)); } // If global assertion, perform constant propagation only if the VN's match and the lcl is non-CSE. else if (curAssertion->op1.vn == tree->gtVNPair.GetConservative()) @@ -2497,7 +2640,7 @@ GenTreePtr Compiler::optAssertionProp_LclVar(EXPSET_TP assertions, const GenTree if (!lclNumIsCSE(tree->AsLclVarCommon()->GetLclNum())) #endif { - return optConstantAssertionProp(curAssertion, tree, stmt DEBUGARG(index)); + return optConstantAssertionProp(curAssertion, tree, stmt DEBUGARG((AssertionIndex)index)); } } } @@ -2512,24 +2655,20 @@ GenTreePtr Compiler::optAssertionProp_LclVar(EXPSET_TP assertions, const GenTree * op1Kind and lclNum, op2Kind and the constant value and is either equal or * not equal assertion. */ -unsigned Compiler::optLocalAssertionIsEqualOrNotEqual(optOp1Kind op1Kind, unsigned lclNum, optOp2Kind op2Kind, - ssize_t cnsVal, EXPSET_TP assertions) +Compiler::AssertionIndex Compiler::optLocalAssertionIsEqualOrNotEqual(optOp1Kind op1Kind, unsigned lclNum, optOp2Kind op2Kind, + ssize_t cnsVal, ASSERT_VALARG_TP assertions) { noway_assert((op1Kind == O1K_LCLVAR) || (op1Kind == O1K_EXACT_TYPE) || (op1Kind == O1K_SUBTYPE)); noway_assert((op2Kind == O2K_CONST_INT) || (op2Kind == O2K_IND_CNS_INT)); - if (!optLocalAssertionProp && assertions == 0) + if (!optLocalAssertionProp && BitVecOps::IsEmpty(apTraits, assertions)) { return NO_ASSERTION_INDEX; } - EXPSET_TP mask = 1; - - for (unsigned index = 1; index <= optAssertionCount; ++index, mask <<= 1) + for (AssertionIndex index = 1; index <= optAssertionCount; ++index) { - noway_assert(mask == optGetAssertionBit(index)); - AssertionDsc* curAssertion = optGetAssertion(index); - if (optLocalAssertionProp || (assertions & mask)) + if (optLocalAssertionProp || BitVecOps::IsMember(apTraits, assertions, index - 1)) { if ((curAssertion->assertionKind != OAK_EQUAL) && (curAssertion->assertionKind != OAK_NOT_EQUAL)) { @@ -2559,27 +2698,28 @@ unsigned Compiler::optLocalAssertionIsEqualOrNotEqual(optOp1Kind op1Kind, unsign * "op1" == "op2" or "op1" != "op2." Does a value number based comparison. * */ -unsigned Compiler::optGlobalAssertionIsEqualOrNotEqual(EXPSET_TP assertions, GenTreePtr op1, GenTreePtr op2) +Compiler::AssertionIndex Compiler::optGlobalAssertionIsEqualOrNotEqual(ASSERT_VALARG_TP assertions, GenTreePtr op1, GenTreePtr op2) { - if (assertions == 0) + if (BitVecOps::IsEmpty(apTraits, assertions)) { return NO_ASSERTION_INDEX; } - unsigned index = 1; - for (EXPSET_TP mask = 1; index <= optAssertionCount; index++, mask <<= 1) + BitVecOps::Iter iter(apTraits, assertions); + unsigned index = 0; + while (iter.NextElem(apTraits, &index)) { - AssertionDsc* curAssertion = optGetAssertion(index); + index++; + if (index > optAssertionCount) break; + AssertionDsc* curAssertion = optGetAssertion((AssertionIndex)index); if ((curAssertion->assertionKind != OAK_EQUAL && curAssertion->assertionKind != OAK_NOT_EQUAL)) { continue; } - if (assertions & mask) + if (curAssertion->op1.vn == op1->gtVNPair.GetConservative() && + curAssertion->op2.vn == op2->gtVNPair.GetConservative()) { - if (curAssertion->op1.vn == op1->gtVNPair.GetConservative() && curAssertion->op2.vn == op2->gtVNPair.GetConservative()) - { - return index; - } + return (AssertionIndex)index; } } return NO_ASSERTION_INDEX; @@ -2594,7 +2734,7 @@ unsigned Compiler::optGlobalAssertionIsEqualOrNotEqual(EXPSET_TP assertions, Gen * Returns the modified tree, or nullptr if no assertion prop took place */ -GenTreePtr Compiler::optAssertionProp_RelOp(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) +GenTreePtr Compiler::optAssertionProp_RelOp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) { assert(tree->OperKind() & GTK_RELOP); @@ -2624,7 +2764,7 @@ GenTreePtr Compiler::optAssertionProp_RelOp(EXPSET_TP assertions, const GenTreeP * perform Value numbering based relop assertion propagation on the tree. * */ -GenTreePtr Compiler::optAssertionPropGlobal_RelOp(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) +GenTreePtr Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) { assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE); @@ -2632,69 +2772,13 @@ GenTreePtr Compiler::optAssertionPropGlobal_RelOp(EXPSET_TP assertions, const Ge GenTreePtr op1 = tree->gtOp.gtOp1; GenTreePtr op2 = tree->gtOp.gtOp2; - // If op2 is null and the VN for op1 is known to be non-null then transform the tree. - if ((op2->gtVNPair.GetConservative() == ValueNumStore::VNForNull()) && - vnStore->IsKnownNonNull(op1->gtVNPair.GetConservative())) - { -#ifdef DEBUG - if (verbose) - { - printf("\nVN relop based non-null assertion prop for op1 in BB%02u:\n", compCurBB->bbNum); - gtDispTree(op1, 0, nullptr, true); - } -#endif - op1->ChangeOperConst(GT_CNS_INT); - op1->gtIntCon.gtIconVal = 0; - op1->gtVNPair.SetBoth(ValueNumStore::VNForNull()); - - gtReverseCond(tree); - - newTree = fgMorphTree(tree); - -#ifdef DEBUG - if (verbose) - { - gtDispTree(newTree, 0, nullptr, true); - } -#endif - return optAssertionProp_Update(newTree, tree, stmt); - } - - // If op1 is null and the VN for op2 is known to be non-null then transform the tree. - if ((op1->gtVNPair.GetConservative() == ValueNumStore::VNForNull()) && - vnStore->IsKnownNonNull(op2->gtVNPair.GetConservative())) - { -#ifdef DEBUG - if (verbose) - { - printf("\nVN relop based non-null assertion prop for op2 in BB%02u:\n", compCurBB->bbNum); - gtDispTree(op2, 0, nullptr, true); - } -#endif - op2->ChangeOperConst(GT_CNS_INT); - op2->gtIntCon.gtIconVal = 0; - op1->gtVNPair.SetBoth(ValueNumStore::VNForNull()); - - gtReverseCond(tree); - - newTree = fgMorphTree(tree); - -#ifdef DEBUG - if (verbose) - { - gtDispTree(newTree, 0, nullptr, true); - } -#endif - return optAssertionProp_Update(newTree, tree, stmt); - } - if (op1->gtOper != GT_LCL_VAR) { return nullptr; } // Find an equal or not equal assertion involving "op1" and "op2". - unsigned index = optGlobalAssertionIsEqualOrNotEqual(assertions, op1, op2); + AssertionIndex index = optGlobalAssertionIsEqualOrNotEqual(assertions, op1, op2); if (index == NO_ASSERTION_INDEX) { return nullptr; @@ -2862,7 +2946,7 @@ GenTreePtr Compiler::optAssertionPropGlobal_RelOp(EXPSET_TP assertions, const Ge * perform local variable name based relop assertion propagation on the tree. * */ -GenTreePtr Compiler::optAssertionPropLocal_RelOp(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) +GenTreePtr Compiler::optAssertionPropLocal_RelOp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) { assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE); @@ -2891,8 +2975,8 @@ GenTreePtr Compiler::optAssertionPropLocal_RelOp(EXPSET_TP assertions, const Gen return nullptr; // Find an equal or not equal assertion about op1 var. - unsigned lclNum = op1->gtLclVarCommon.gtLclNum; noway_assert(lclNum < lvaCount); - unsigned index = optLocalAssertionIsEqualOrNotEqual(op1Kind, lclNum, op2Kind, cnsVal, assertions); + unsigned lclNum = op1->gtLclVarCommon.gtLclNum; noway_assert(lclNum < lvaCount); + AssertionIndex index = optLocalAssertionIsEqualOrNotEqual(op1Kind, lclNum, op2Kind, cnsVal, assertions); if (index == NO_ASSERTION_INDEX) { @@ -2952,7 +3036,7 @@ GenTreePtr Compiler::optAssertionPropLocal_RelOp(EXPSET_TP assertions, const Gen * * Returns the modified tree, or nullptr if no assertion prop took place. */ -GenTreePtr Compiler::optAssertionProp_Cast(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) +GenTreePtr Compiler::optAssertionProp_Cast(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) { assert(tree->gtOper == GT_CAST); @@ -3050,7 +3134,7 @@ GenTreePtr Compiler::optAssertionProp_Cast(EXPSET_TP assertions, const GenTreePt * Given a tree with an array bounds check node, eliminate it because it was * checked already in the program. */ -GenTreePtr Compiler::optAssertionProp_Comma(EXPSET_TP assertions, +GenTreePtr Compiler::optAssertionProp_Comma(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) { @@ -3059,13 +3143,20 @@ GenTreePtr Compiler::optAssertionProp_Comma(EXPSET_TP assertions, { // Since the GT_COMMA tree gets processed by assertion prop after the child GT_ARR_BOUNDS_CHECK // node in execution order, bounds check assertions will be included for the parent GT_COMMA node. - // Remove the assertion made by the bounds check tree about itself. - unsigned index = tree->gtGetOp1()->GetAssertion(); - EXPSET_TP newAssertions = (index != NO_ASSERTION_INDEX && optGetAssertion(index)->IsBoundsCheckNoThrow()) - ? assertions & ~optGetAssertionBit(index) - : assertions; - - return optAssertionProp_BndsChk(newAssertions, tree, stmt); + // Remove the assertion made by the bounds check tree about itself. Assertion only applies to + // "future" bounds checks. + AssertionIndex index = (AssertionIndex)tree->gtGetOp1()->GetAssertion(); + if (index != NO_ASSERTION_INDEX && optGetAssertion(index)->IsBoundsCheckNoThrow()) + { + BitVecOps::RemoveElemD(apTraits, assertions, index - 1); + GenTreePtr newTree = optAssertionProp_BndsChk(assertions, tree, stmt); + BitVecOps::AddElemD(apTraits, assertions, index - 1); + return newTree; + } + else + { + return optAssertionProp_BndsChk(assertions, tree, stmt); + } } return nullptr; } @@ -3080,7 +3171,7 @@ GenTreePtr Compiler::optAssertionProp_Comma(EXPSET_TP assertions, * */ -GenTreePtr Compiler::optAssertionProp_Ind(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) +GenTreePtr Compiler::optAssertionProp_Ind(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) { assert(tree->OperIsIndir()); @@ -3105,7 +3196,7 @@ GenTreePtr Compiler::optAssertionProp_Ind(EXPSET_TP assertions, const GenTreePt #ifdef DEBUG bool vnBased = false; - unsigned index = NO_ASSERTION_INDEX; + AssertionIndex index = NO_ASSERTION_INDEX; #endif if (optAssertionIsNonNull(op1, assertions DEBUGARG(&vnBased) DEBUGARG(&index))) { @@ -3138,7 +3229,7 @@ GenTreePtr Compiler::optAssertionProp_Ind(EXPSET_TP assertions, const GenTreePt * Note: If both VN and assertion table yield a matching assertion, "pVnBased" * is only set and the return value is "NO_ASSERTION_INDEX." */ -bool Compiler::optAssertionIsNonNull(GenTreePtr op, EXPSET_TP assertions DEBUGARG(bool* pVnBased) DEBUGARG(unsigned* pIndex)) +bool Compiler::optAssertionIsNonNull(GenTreePtr op, ASSERT_VALARG_TP assertions DEBUGARG(bool* pVnBased) DEBUGARG(AssertionIndex* pIndex)) { bool vnBased = (!optLocalAssertionProp && vnStore->IsKnownNonNull(op->gtVNPair.GetConservative())); #ifdef DEBUG @@ -3153,7 +3244,7 @@ bool Compiler::optAssertionIsNonNull(GenTreePtr op, EXPSET_TP assertions DEBUGAR return true; } - unsigned index = optAssertionIsNonNullInternal(op, assertions); + AssertionIndex index = optAssertionIsNonNullInternal(op, assertions); #ifdef DEBUG *pIndex = index; #endif @@ -3165,43 +3256,42 @@ bool Compiler::optAssertionIsNonNull(GenTreePtr op, EXPSET_TP assertions DEBUGAR * from the set of "assertions." * */ -unsigned Compiler::optAssertionIsNonNullInternal(GenTreePtr op, EXPSET_TP assertions) +Compiler::AssertionIndex Compiler::optAssertionIsNonNullInternal(GenTreePtr op, ASSERT_VALARG_TP assertions) { // If local assertion prop use lcl comparison, else use VN comparison. if (!optLocalAssertionProp) { ValueNum vn = op->gtVNPair.GetConservative(); - if (assertions == 0) + if (BitVecOps::IsEmpty(apTraits, assertions)) { return NO_ASSERTION_INDEX; } // Check each assertion to find if we have a vn == or != null assertion. - unsigned index = 1; - for (EXPSET_TP mask = 1; index <= optAssertionCount; index++, mask <<= 1) + BitVecOps::Iter iter(apTraits, assertions); + unsigned index = 0; + while (iter.NextElem(apTraits, &index)) { - if (assertions & mask) + index++; + if (index > optAssertionCount) break; + AssertionDsc* curAssertion = optGetAssertion((AssertionIndex)index); + if (curAssertion->assertionKind != OAK_NOT_EQUAL) { - AssertionDsc* curAssertion = optGetAssertion(index); - if (curAssertion->assertionKind != OAK_NOT_EQUAL) - { - continue; - } - if (curAssertion->op1.vn != vn || curAssertion->op2.vn != ValueNumStore::VNForNull()) - { - continue; - } - return index; + continue; } + if (curAssertion->op1.vn != vn || curAssertion->op2.vn != ValueNumStore::VNForNull()) + { + continue; + } + return (AssertionIndex)index; } } else { unsigned lclNum = op->AsLclVarCommon()->GetLclNum(); // Check each assertion to find if we have a variable == or != null assertion. - unsigned index = 1; - for (EXPSET_TP mask = 1; index <= optAssertionCount; index++, mask <<= 1) + for (AssertionIndex index = 1; index <= optAssertionCount; index++) { AssertionDsc* curAssertion = optGetAssertion(index); if ((curAssertion->assertionKind == OAK_NOT_EQUAL) && // kind @@ -3216,6 +3306,48 @@ unsigned Compiler::optAssertionIsNonNullInternal(GenTreePtr op, EXPSET_TP assert } return NO_ASSERTION_INDEX; } +/***************************************************************************** + * + * Given a tree consisting of a call and a set of available assertions, we + * try to propagate a non-null assertion and modify the Call tree if we can. + * Returns the modified tree, or nullptr if no assertion prop took place. + * + */ +GenTreePtr Compiler::optNonNullAssertionProp_Call(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) +{ + assert(tree->gtOper == GT_CALL); + if ((tree->gtFlags & GTF_CALL_NULLCHECK) == 0) + { + return nullptr; + } + GenTreePtr op1 = gtGetThisArg(tree); + noway_assert(op1 != nullptr); + if (op1->gtOper != GT_LCL_VAR) + { + return nullptr; + } + +#ifdef DEBUG + bool vnBased = false; + AssertionIndex index = NO_ASSERTION_INDEX; +#endif + if (optAssertionIsNonNull(op1, assertions DEBUGARG(&vnBased) DEBUGARG(&index))) + { +#ifdef DEBUG + if (verbose) + { + (vnBased) ? printf("\nVN based non-null prop in BB%02u:\n", compCurBB->bbNum) + : printf("\nNon-null prop for index #%02u in BB%02u:\n", index, compCurBB->bbNum); + gtDispTree(tree, 0, nullptr, true); + } +#endif + tree->gtFlags &= ~GTF_CALL_NULLCHECK; + tree->gtFlags &= ~GTF_EXCEPT; + noway_assert(tree->gtFlags & GTF_SIDE_EFFECT); + return tree; + } + return nullptr; +} /***************************************************************************** * @@ -3229,39 +3361,13 @@ unsigned Compiler::optAssertionIsNonNullInternal(GenTreePtr op, EXPSET_TP assert * */ -GenTreePtr Compiler::optAssertionProp_Call(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) +GenTreePtr Compiler::optAssertionProp_Call(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) { assert(tree->gtOper == GT_CALL); - if ((tree->gtFlags & GTF_CALL_NULLCHECK) != 0) - { - GenTreePtr op1 = gtGetThisArg(tree); - noway_assert(op1 != nullptr); - if (op1->gtOper != GT_LCL_VAR) - { - return nullptr; - } - -#ifdef DEBUG - bool vnBased = false; - unsigned index = NO_ASSERTION_INDEX; -#endif - if (optAssertionIsNonNull(op1, assertions DEBUGARG(&vnBased) DEBUGARG(&index))) - { -#ifdef DEBUG - if (verbose) - { - (vnBased) ? printf("\nVN based non-null prop in BB%02u:\n", compCurBB->bbNum) - : printf("\nNon-null prop for index #%02u in BB%02u:\n", index, compCurBB->bbNum); - gtDispTree(tree, 0, nullptr, true); - } -#endif - tree->gtFlags &= ~GTF_CALL_NULLCHECK; - tree->gtFlags &= ~GTF_EXCEPT; - noway_assert(tree->gtFlags & GTF_SIDE_EFFECT); - - return optAssertionProp_Update(tree, tree, stmt); - } + if (optNonNullAssertionProp_Call(assertions, tree, stmt)) + { + return optAssertionProp_Update(tree, tree, stmt); } else if (!optLocalAssertionProp && (tree->gtCall.gtCallType == CT_HELPER)) { @@ -3316,7 +3422,7 @@ GenTreePtr Compiler::optAssertionProp_Call(EXPSET_TP assertions, const GenTreePt * Given a tree consisting of a comma node with a bounds check, remove any * redundant bounds check that has already been checked in the program flow. */ -GenTreePtr Compiler::optAssertionProp_BndsChk(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) +GenTreePtr Compiler::optAssertionProp_BndsChk(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt) { if (optLocalAssertionProp) { @@ -3325,16 +3431,14 @@ GenTreePtr Compiler::optAssertionProp_BndsChk(EXPSET_TP assertions, const GenTre assert(tree->gtOper == GT_COMMA && tree->gtGetOp1()->OperGet() == GT_ARR_BOUNDS_CHECK); - unsigned index = 1; - for (EXPSET_TP mask = 1; index <= optAssertionCount; index++, mask <<= 1) + BitVecOps::Iter iter(apTraits, assertions); + unsigned index = 0; + while (iter.NextElem(apTraits, &index)) { - if ((assertions & mask) == 0) - { - continue; - } - + index++; + if (index > optAssertionCount) break; // If it is not a nothrow assertion, skip. - AssertionDsc* curAssertion = optGetAssertion(index); + AssertionDsc* curAssertion = optGetAssertion((AssertionIndex)index); if (!curAssertion->IsBoundsCheckNoThrow()) { continue; @@ -3420,7 +3524,10 @@ GenTreePtr Compiler::optAssertionProp_BndsChk(EXPSET_TP assertions, const GenTre GenTreePtr newTree = optAssertionProp_Update(tree, tree, stmt); if (newTree != nullptr) { - return optAssertionProp(assertions & ~mask, tree, stmt); + BitVecOps::RemoveElemD(apTraits, assertions, index - 1); + newTree = optAssertionProp(assertions, tree, stmt); + BitVecOps::AddElemD(apTraits, assertions, index - 1); + return newTree; } } return nullptr; @@ -3494,9 +3601,9 @@ GenTreePtr Compiler::optAssertionProp_Update(const GenTreePtr newTree, const Gen * Returns the modified tree, or nullptr if no assertion prop took place. */ -GenTreePtr Compiler::optAssertionProp(EXPSET_TP assertions, - const GenTreePtr tree, - const GenTreePtr stmt) +GenTreePtr Compiler::optAssertionProp(ASSERT_VALARG_TP assertions, + const GenTreePtr tree, + const GenTreePtr stmt) { switch (tree->gtOper) { @@ -3530,84 +3637,81 @@ GenTreePtr Compiler::optAssertionProp(EXPSET_TP assertions, } } -/***************************************************************************** - * - * Given a tree node that makes an assertion - * this method computes the set of implied assertions that are also true - */ +//------------------------------------------------------------------------ +// optImpliedAssertions: Given a tree node that makes an assertion this +// method computes the set of implied assertions +// that are also true. The updated assertions are +// maintained on the Compiler object. +// +// Arguments: +// assertionIndex : The id of the assertion. +// activeAssertions : The assertions that are already true at this point. -EXPSET_TP Compiler::optImpliedAssertions(unsigned assertionIndex, EXPSET_TP activeAssertions) +void Compiler::optImpliedAssertions(AssertionIndex assertionIndex, ASSERT_TP& activeAssertions) { noway_assert(!optLocalAssertionProp); noway_assert(assertionIndex != 0); noway_assert(assertionIndex <= optAssertionCount); - AssertionDsc* curAssertion = this->optGetAssertion(assertionIndex); - if (activeAssertions) + AssertionDsc* curAssertion = optGetAssertion(assertionIndex); + if (!BitVecOps::IsEmpty(apTraits, activeAssertions)) { - EXPSET_TP chkAssertionMask = optGetVnMappedAssertions(curAssertion->op1.vn); - if (curAssertion->op2.kind == O2K_LCLVAR_COPY) - { - chkAssertionMask |= optGetVnMappedAssertions(curAssertion->op2.vn); - } - chkAssertionMask &= activeAssertions; - - if (!chkAssertionMask) + const ASSERT_TP mappedAssertions = optGetVnMappedAssertions(curAssertion->op1.vn); + if (mappedAssertions == NULL) { - return 0; + return; } - bool curIsCopyAssertion = curAssertion->IsCopyAssertion(); - EXPSET_TP result = 0; + ASSERT_TP chkAssertions = BitVecOps::MakeCopy(apTraits, mappedAssertions); - // Check each assertion in chkAssertionMask to see if it can be applied to curAssertion - EXPSET_TP singleBit = 1; - for (unsigned chkIndex = 1; chkIndex <= optAssertionCount; chkIndex++, singleBit <<= 1) + if (curAssertion->op2.kind == O2K_LCLVAR_COPY) { - // If assertions to check becomes empty skip. - if (!chkAssertionMask) - { - break; - } - - // Is iterated assertion in assertions to be checked? - if (!(singleBit & chkAssertionMask)) + const ASSERT_TP op2Assertions = optGetVnMappedAssertions(curAssertion->op2.vn); + if (op2Assertions != NULL) { - continue; + BitVecOps::UnionD(apTraits, chkAssertions, op2Assertions); } + } + BitVecOps::IntersectionD(apTraits, chkAssertions, activeAssertions); - // Clear the iterated assertion from assertions to be checked. - chkAssertionMask &= ~singleBit; + if (BitVecOps::IsEmpty(apTraits, chkAssertions)) + { + return; + } + // Check each assertion in chkAssertions to see if it can be applied to curAssertion + BitVecOps::Iter chkIter(apTraits, chkAssertions); + unsigned chkIndex = 0; + while (chkIter.NextElem(apTraits, &chkIndex)) + { + chkIndex++; + if (chkIndex > optAssertionCount) break; if (chkIndex == assertionIndex) { continue; } // Determine which one is a copy assertion and use the other to check for implied assertions. - AssertionDsc* iterAssertion = optGetAssertion(chkIndex); - if (curIsCopyAssertion) + AssertionDsc* iterAssertion = optGetAssertion((AssertionIndex)chkIndex); + if (curAssertion->IsCopyAssertion()) { - result |= optImpliedByCopyAssertion(curAssertion, iterAssertion); + optImpliedByCopyAssertion(curAssertion, iterAssertion, activeAssertions); } else if (iterAssertion->IsCopyAssertion()) { - result |= optImpliedByCopyAssertion(iterAssertion, curAssertion); + optImpliedByCopyAssertion(iterAssertion, curAssertion, activeAssertions); } + } - return result; } - // Is curAssertion a constant assignment of a 32-bit integer? // (i.e GT_LVL_VAR X == GT_CNS_INT) - if ((curAssertion->assertionKind == OAK_EQUAL) && + else if ((curAssertion->assertionKind == OAK_EQUAL) && (curAssertion->op1.kind == O1K_LCLVAR) && (curAssertion->op2.kind == O2K_CONST_INT)) { - return optImpliedByConstAssertion(curAssertion); + optImpliedByConstAssertion(curAssertion, activeAssertions); } - - return NO_ASSERTION_INDEX; } /***************************************************************************** @@ -3616,38 +3720,22 @@ EXPSET_TP Compiler::optImpliedAssertions(unsigned assertionIndex, EXPSET_TP acti * of non-Null implied assertions that are also true */ -EXPSET_TP Compiler::optImpliedByTypeOfAssertions(EXPSET_TP activeAssertions) +void Compiler::optImpliedByTypeOfAssertions(ASSERT_TP& activeAssertions) { - EXPSET_TP result = 0; - - if (activeAssertions == 0) + if (BitVecOps::IsEmpty(apTraits, activeAssertions)) { - return result; + return; } // Check each assertion in activeAssertions to see if it can be applied to constAssertion - EXPSET_TP singleBit = 1; - - for (unsigned chkIndex = 1; chkIndex <= optAssertionCount; chkIndex++, singleBit <<= 1) + BitVecOps::Iter chkIter(apTraits, activeAssertions); + unsigned chkIndex = 0; + while (chkIter.NextElem(apTraits, &chkIndex)) { - if (!activeAssertions) - { - break; - } - - assert(singleBit == optGetAssertionBit(chkIndex)); - - // If assertion not currently in active set, skip. - if (!(singleBit & activeAssertions)) - { - continue; - } - - // Remove the singleBit from the chkAssertionMask - activeAssertions &= ~singleBit; - + chkIndex++; + if (chkIndex > optAssertionCount) break; // chkAssertion must be Type/Subtype is equal assertion - AssertionDsc* chkAssertion = optGetAssertion(chkIndex); + AssertionDsc* chkAssertion = optGetAssertion((AssertionIndex)chkIndex); if ((chkAssertion->op1.kind != O1K_SUBTYPE && chkAssertion->op1.kind != O1K_EXACT_TYPE) || (chkAssertion->assertionKind != OAK_EQUAL)) { @@ -3657,7 +3745,7 @@ EXPSET_TP Compiler::optImpliedByTypeOfAssertions(EXPSET_TP activeAssertions) // Search the assertion table for a non-null assertion on op1 that matches chkAssertion for (unsigned impIndex = 1; impIndex <= optAssertionCount; impIndex++) { - AssertionDsc* impAssertion = optGetAssertion(impIndex); + AssertionDsc* impAssertion = optGetAssertion((AssertionIndex)impIndex); // The impAssertion must be different from the chkAssertion if (impIndex == chkIndex) @@ -3674,12 +3762,10 @@ EXPSET_TP Compiler::optImpliedByTypeOfAssertions(EXPSET_TP activeAssertions) continue; } - EXPSET_TP impAssertBit = optGetAssertionBit(impIndex); - // The bit may already be in the result set - if ((result & impAssertBit) == 0) + if (!BitVecOps::IsMember(apTraits, activeAssertions, impIndex - 1)) { - result |= optGetAssertionBit(impIndex); + BitVecOps::AddElemD(apTraits, activeAssertions, impIndex - 1); #ifdef DEBUG if (verbose) { @@ -3693,24 +3779,27 @@ EXPSET_TP Compiler::optImpliedByTypeOfAssertions(EXPSET_TP activeAssertions) break; } } - - // Note 'result' will either be zero if no new assertions are implied - // or have exactly one bit set, representing the new implied assertion - return result; } -/***************************************************************************** - * Given a value number "vn", get the assertions we have about "vn." - */ +//------------------------------------------------------------------------ +// optGetVnMappedAssertions: Given a value number, get the assertions +// we have about the value number. +// +// Arguments: +// vn - The given value number. +// +// Return Value: +// The assertions we have about the value number. +// -EXPSET_TP Compiler::optGetVnMappedAssertions(ValueNum vn) +ASSERT_VALRET_TP Compiler::optGetVnMappedAssertions(ValueNum vn) { - EXPSET_TP set = 0; + ASSERT_TP set = BitVecOps::UninitVal(); if (optValueNumToAsserts->Lookup(vn, &set)) { return set; } - return 0; + return BitVecOps::UninitVal(); } /***************************************************************************** @@ -3719,42 +3808,29 @@ EXPSET_TP Compiler::optGetVnMappedAssertions(ValueNum vn) * that are also true */ -EXPSET_TP Compiler::optImpliedByConstAssertion(AssertionDsc* constAssertion) +void Compiler::optImpliedByConstAssertion(AssertionDsc* constAssertion, ASSERT_TP& result) { noway_assert(constAssertion->assertionKind == OAK_EQUAL); noway_assert(constAssertion->op1.kind == O1K_LCLVAR); noway_assert(constAssertion->op2.kind == O2K_CONST_INT); - EXPSET_TP result = 0; ssize_t iconVal = constAssertion->op2.u1.iconVal; - // chkAssertionMask includes op1's lvAssertionDep - EXPSET_TP chkAssertionMask = optGetVnMappedAssertions(constAssertion->op1.vn); - if (!chkAssertionMask) + const ASSERT_TP chkAssertions = optGetVnMappedAssertions(constAssertion->op1.vn); + if (chkAssertions == NULL || BitVecOps::IsEmpty(apTraits, chkAssertions)) { - return result; + return; } - // Check each assertion in chkAssertionMask to see if it can be applied to constAssertion - EXPSET_TP singleBit = 1; - for (unsigned chkIndex = 1; chkIndex <= optAssertionCount; chkIndex++, singleBit <<= 1) + // Check each assertion in chkAssertions to see if it can be applied to constAssertion + BitVecOps::Iter chkIter(apTraits, chkAssertions); + unsigned chkIndex = 0; + while (chkIter.NextElem(apTraits, &chkIndex)) { - // Are there no more bits in chkAssertionMask ? - if (!chkAssertionMask) - { - break; - } - - if (!(singleBit & chkAssertionMask)) - { - continue; - } - - // Remove the singleBit from the chkAssertionMask. - chkAssertionMask &= ~singleBit; - + chkIndex++; + if (chkIndex > optAssertionCount) break; // The impAssertion must be different from the const assertion. - AssertionDsc* impAssertion = optGetAssertion(chkIndex); + AssertionDsc* impAssertion = optGetAssertion((AssertionIndex)chkIndex); if (impAssertion == constAssertion) { continue; @@ -3786,8 +3862,8 @@ EXPSET_TP Compiler::optImpliedByConstAssertion(AssertionDsc* constAssertion) } if (usable) - { - result |= optGetAssertionBit(chkIndex); + { + BitVecOps::AddElemD(apTraits, result, chkIndex - 1); #ifdef DEBUG if (verbose) { @@ -3799,7 +3875,6 @@ EXPSET_TP Compiler::optImpliedByConstAssertion(AssertionDsc* constAssertion) #endif } } - return result; } @@ -3812,8 +3887,8 @@ EXPSET_TP Compiler::optImpliedByConstAssertion(AssertionDsc* constAssertion) * we don't have kill sets and we depend on their value num for dataflow. */ -EXPSET_TP Compiler::optImpliedByCopyAssertion(AssertionDsc* copyAssertion, AssertionDsc* depAssertion) -{ +void Compiler::optImpliedByCopyAssertion(AssertionDsc* copyAssertion, AssertionDsc* depAssertion, ASSERT_TP& result) +{ noway_assert(copyAssertion->IsCopyAssertion()); // Get the copyAssert's lcl/ssa nums. @@ -3849,7 +3924,7 @@ EXPSET_TP Compiler::optImpliedByCopyAssertion(AssertionDsc* copyAssertion, Asser if (copyAssertLclNum == BAD_VAR_NUM || copyAssertSsaNum == SsaConfig::RESERVED_SSA_NUM) { - return 0; + return; } // Get the depAssert's lcl/ssa nums. @@ -3875,7 +3950,7 @@ EXPSET_TP Compiler::optImpliedByCopyAssertion(AssertionDsc* copyAssertion, Asser if (depAssertLclNum == BAD_VAR_NUM || depAssertSsaNum == SsaConfig::RESERVED_SSA_NUM) { - return 0; + return; } // Is depAssertion a constant assignment of a 32-bit integer? @@ -3886,8 +3961,7 @@ EXPSET_TP Compiler::optImpliedByCopyAssertion(AssertionDsc* copyAssertion, Asser // Search the assertion table for an assertion on op1 that matches depAssertion // The matching assertion is the implied assertion. - EXPSET_TP result = 0; - for (unsigned impIndex = 1; impIndex <= optAssertionCount; impIndex++) + for (AssertionIndex impIndex = 1; impIndex <= optAssertionCount; impIndex++) { AssertionDsc* impAssertion = optGetAssertion(impIndex); @@ -3951,8 +4025,8 @@ EXPSET_TP Compiler::optImpliedByCopyAssertion(AssertionDsc* copyAssertion, Asser } if (usable) - { - result |= optGetAssertionBit(impIndex); + { + BitVecOps::AddElemD(apTraits, result, impIndex - 1); #ifdef DEBUG if (verbose) @@ -3965,13 +4039,11 @@ EXPSET_TP Compiler::optImpliedByCopyAssertion(AssertionDsc* copyAssertion, Asser // If the depAssertion is a const assertion then any other assertions that it implies could also imply a subrange assertion. if (depIsConstAssertion) { - result |= optImpliedByConstAssertion(impAssertion); + optImpliedByConstAssertion(impAssertion, result); } } } - - return result; } #include "dataflow.h" @@ -3984,80 +4056,114 @@ EXPSET_TP Compiler::optImpliedByCopyAssertion(AssertionDsc* copyAssertion, Asser class AssertionPropFlowCallback { private: - EXPSET_TP preMergeOut; - EXPSET_TP postMergeOut; - - EXPSET_TP preMergeJumpDestOut; - EXPSET_TP postMergeJumpDestOut; + ASSERT_TP preMergeOut; + ASSERT_TP preMergeJumpDestOut; - EXPSET_TP* mJumpDestOut; - EXPSET_TP* mJumpDestGen; + ASSERT_TP* mJumpDestOut; + ASSERT_TP* mJumpDestGen; - Compiler* m_pCompiler; + Compiler* m_pCompiler; + BitVecTraits* apTraits; public: - AssertionPropFlowCallback(Compiler* pCompiler, EXPSET_TP* jumpDestOut, EXPSET_TP* jumpDestGen) - : mJumpDestOut(jumpDestOut) + AssertionPropFlowCallback(Compiler* pCompiler, ASSERT_TP* jumpDestOut, ASSERT_TP* jumpDestGen) + : preMergeOut(BitVecOps::UninitVal()) + , preMergeJumpDestOut(BitVecOps::UninitVal()) + , mJumpDestOut(jumpDestOut) , mJumpDestGen(jumpDestGen) , m_pCompiler(pCompiler) - {} + , apTraits(pCompiler->apTraits) + { + } // At the start of the merge function of the dataflow equations, initialize premerge state (to detect change.) void StartMerge(BasicBlock* block) { - JITDUMP("AssertionPropCallback::StartMerge: BB%02u in -> %016I64X\n", block->bbNum, block->bbAssertionIn); - preMergeOut = block->bbAssertionOut; - preMergeJumpDestOut = mJumpDestOut[block->bbNum]; + JITDUMP("AssertionPropCallback::StartMerge: BB%02d in -> %s\n", + block->bbNum, BitVecOps::ToString(apTraits, block->bbAssertionIn)); + BitVecOps::Assign(apTraits, preMergeOut, block->bbAssertionOut); + BitVecOps::Assign(apTraits, preMergeJumpDestOut, mJumpDestOut[block->bbNum]); } // During merge, perform the actual merging of the predecessor's (since this is a forward analysis) dataflow flags. void Merge(BasicBlock* block, BasicBlock* predBlock, flowList* preds) { - EXPSET_TP* pAssertionOut = ((predBlock->bbJumpKind == BBJ_COND) && (predBlock->bbJumpDest == block)) ? &mJumpDestOut[predBlock->bbNum] : &predBlock->bbAssertionOut; - JITDUMP("AssertionPropCallback::Merge : BB%02u in -> %016I64X, pred BB%02u out -> %016I64X\n", block->bbNum, block->bbAssertionIn, predBlock->bbNum, predBlock->bbAssertionOut); - block->bbAssertionIn &= *pAssertionOut; + ASSERT_TP pAssertionOut = ((predBlock->bbJumpKind == BBJ_COND) && (predBlock->bbJumpDest == block)) + ? mJumpDestOut[predBlock->bbNum] : predBlock->bbAssertionOut; + JITDUMP("AssertionPropCallback::Merge : BB%02d in -> %s, predBlock BB%02d out -> %s\n", + block->bbNum, BitVecOps::ToString(apTraits, block->bbAssertionIn), + predBlock->bbNum, BitVecOps::ToString(apTraits, predBlock->bbAssertionOut)); + BitVecOps::IntersectionD(apTraits, block->bbAssertionIn, pAssertionOut); } // At the end of the merge store results of the dataflow equations, in a postmerge state. - void EndMerge(BasicBlock* block) + bool EndMerge(BasicBlock* block) { - JITDUMP("AssertionPropCallback::EndMerge : BB%02u in -> %016I64X\n\n", block->bbNum, block->bbAssertionIn); - postMergeOut = block->bbAssertionOut & (block->bbAssertionGen | block->bbAssertionIn); - postMergeJumpDestOut = mJumpDestOut[block->bbNum] & (mJumpDestGen[block->bbNum] | block->bbAssertionIn); - } + JITDUMP("AssertionPropCallback::EndMerge : BB%02d in -> %s\n\n", block->bbNum, + BitVecOps::ToString(apTraits, block->bbAssertionIn)); + + // PERF: eliminate this tmp by passing in a OperationTree (AST) to the bitset, + // so the expr tree is operated on a single bit level. See "expression templates." + ASSERT_TP tmp = BitVecOps::MakeCopy(apTraits, block->bbAssertionIn); + BitVecOps::UnionD(apTraits, tmp, block->bbAssertionGen); + BitVecOps::IntersectionD(apTraits, block->bbAssertionOut, tmp); - // Check if anything changed by comparing premerge and postmerge states. - bool Changed(BasicBlock* block) - { - JITDUMP("AssertionPropCallback::Changed : BB%02u before out -> %016I64X; after out -> %016I64X;\n\t\tjumpDest before out -> %016I64X; jumpDest after out -> %016I64X;\n\n", block->bbNum, preMergeOut, postMergeOut, preMergeJumpDestOut, postMergeJumpDestOut); - return (postMergeOut != preMergeOut) || (postMergeJumpDestOut != preMergeJumpDestOut); - } + BitVecOps::Assign(apTraits, tmp, block->bbAssertionIn); + BitVecOps::UnionD(apTraits, tmp, mJumpDestGen[block->bbNum]); + BitVecOps::IntersectionD(apTraits, mJumpDestOut[block->bbNum], tmp); - // Finish any updates to the basic blocks after the merge. - DataFlow::UpdateResult Update(BasicBlock* block) - { - block->bbAssertionOut = postMergeOut; - mJumpDestOut[block->bbNum] = postMergeJumpDestOut; - return DataFlow::ContinueAnalysis; - } + bool changed = (!BitVecOps::Equal(apTraits, preMergeOut, block->bbAssertionOut) || + !BitVecOps::Equal(apTraits, preMergeJumpDestOut, mJumpDestOut[block->bbNum])); - // Reset the analysis to start from the beginning. Unused for assertion prop. - void Reset() - { - assert(false); + if (changed) + { + JITDUMP("AssertionPropCallback::Changed : BB%02d before out -> %s; after out -> %s;\n" + "\t\tjumpDest before out -> %s; jumpDest after out -> %s;\n\n", + block->bbNum, + BitVecOps::ToString(apTraits, preMergeOut), + BitVecOps::ToString(apTraits, block->bbAssertionOut), + BitVecOps::ToString(apTraits, preMergeJumpDestOut), + BitVecOps::ToString(apTraits, mJumpDestOut[block->bbNum])); + } + else + { + JITDUMP("AssertionPropCallback::Unchanged : BB%02d out -> %s; \t\tjumpDest out -> %s\n\n", + block->bbNum, + BitVecOps::ToString(apTraits, block->bbAssertionOut), + BitVecOps::ToString(apTraits, mJumpDestOut[block->bbNum])); + } + + return changed; } }; +ASSERT_VALRET_TP Compiler::optNewFullAssertSet() +{ + return BitVecOps::MakeCopy(apTraits, apFull); +} + +ASSERT_VALRET_TP Compiler::optNewEmptyAssertSet() +{ + return BitVecOps::MakeCopy(apTraits, apEmpty); +} + /***************************************************************************** * * Compute the assertions generated by each block. */ -void Compiler::optComputeAssertionGen(EXPSET_TP* jumpDestGen) +ASSERT_TP* Compiler::optComputeAssertionGen() { + ASSERT_TP* jumpDestGen = fgAllocateTypeForEachBlk<ASSERT_TP>(); + + ASSERT_TP valueGen = BitVecOps::MakeEmpty(apTraits); + ASSERT_TP jumpDestValueGen = BitVecOps::MakeEmpty(apTraits); + for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) { - EXPSET_TP valueGen = 0; - EXPSET_TP jumpDestValueGen = 0; + jumpDestGen[block->bbNum] = BitVecOps::MakeEmpty(apTraits); + + BitVecOps::ClearD(apTraits, valueGen); + BitVecOps::ClearD(apTraits, jumpDestValueGen); // Walk the statement trees in this basic block. for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext) @@ -4069,7 +4175,7 @@ void Compiler::optComputeAssertionGen(EXPSET_TP* jumpDestGen) // Store whatever we have accumulated into jumpDest edge's valueGen. if (tree->gtOper == GT_JTRUE) { - jumpDestValueGen = valueGen; + BitVecOps::Assign(apTraits, jumpDestValueGen, valueGen); } if (!tree->HasAssertion()) { @@ -4078,9 +4184,9 @@ void Compiler::optComputeAssertionGen(EXPSET_TP* jumpDestGen) // For regular trees, just update valueGen. For GT_JTRUE, for false part, // update valueGen and true part update jumpDestValueGen. - unsigned int assertionIndex[2] = { - tree->GetAssertion(), - (tree->OperGet() == GT_JTRUE) ? optFindComplementary(tree->GetAssertion()) : 0 + AssertionIndex assertionIndex[2] = { + (AssertionIndex)tree->GetAssertion(), + (tree->OperGet() == GT_JTRUE) ? optFindComplementary((AssertionIndex)tree->GetAssertion()) : 0 }; for (unsigned i = 0; i < 2; ++i) @@ -4088,29 +4194,30 @@ void Compiler::optComputeAssertionGen(EXPSET_TP* jumpDestGen) if (assertionIndex[i] > 0) { // If GT_JTRUE, and true part use jumpDestValueGen. - EXPSET_TP& gen = (i == 0 && tree->OperGet() == GT_JTRUE) ? jumpDestValueGen : valueGen; - EXPSET_TP assertBits = optGetAssertionBit(assertionIndex[i]); - assertBits |= optImpliedAssertions(assertionIndex[i], gen); - gen |= assertBits; + ASSERT_TP& gen = (i == 0 && tree->OperGet() == GT_JTRUE) ? jumpDestValueGen : valueGen; + optImpliedAssertions(assertionIndex[i], gen); + BitVecOps::AddElemD(apTraits, gen, assertionIndex[i] - 1); } } } } - block->bbAssertionGen = valueGen; - jumpDestGen[block->bbNum] = jumpDestValueGen; + BitVecOps::Assign(apTraits, block->bbAssertionGen, valueGen); + BitVecOps::Assign(apTraits, jumpDestGen[block->bbNum], jumpDestValueGen); #ifdef DEBUG if (verbose) { - printf("\nBB%02u valueGen = %s", block->bbNum, genES2str(valueGen)); + printf("\nBB%02u valueGen = %s", block->bbNum, BitVecOps::ToString(apTraits, valueGen)); if (block->bbJumpKind == BBJ_COND) { - printf(" => BB%02u valueGen = %s,", block->bbJumpDest->bbNum, genES2str(jumpDestValueGen)); + printf(" => BB%02u valueGen = %s,", block->bbJumpDest->bbNum, BitVecOps::ToString(apTraits, jumpDestValueGen)); } } #endif + } + return jumpDestGen; } /***************************************************************************** @@ -4118,19 +4225,348 @@ void Compiler::optComputeAssertionGen(EXPSET_TP* jumpDestGen) * Initialize the assertion data flow flags that will be propagated. */ -void Compiler::optInitAssertionDataflowFlags(EXPSET_TP* jumpDestOut, EXPSET_TP* jumpDestGen) +ASSERT_TP* Compiler::optInitAssertionDataflowFlags() { + ASSERT_TP* jumpDestOut = fgAllocateTypeForEachBlk<ASSERT_TP>(); + // Initially estimate the OUT sets to everything except killed expressions - // Also set the IN sets to 1, so that we can perform the intersection + // Also set the IN sets to 1, so that we can perform the intersection. + // Also, zero-out the flags for handler blocks, as we could be in the + // handler due to an exception bypassing the regular program flow which + // actually generates assertions along the bbAssertionOut/jumpDestOut + // edges. for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) { - block->bbAssertionOut = ((EXPSET_TP) -1); - block->bbAssertionIn = ((EXPSET_TP) -1); - jumpDestOut[block->bbNum] = ((EXPSET_TP) -1); + block->bbAssertionIn = bbIsHandlerBeg(block) ? optNewEmptyAssertSet() : optNewFullAssertSet(); + block->bbAssertionGen = optNewEmptyAssertSet(); + block->bbAssertionOut = optNewFullAssertSet(); + jumpDestOut[block->bbNum] = optNewEmptyAssertSet(); + BitVecOps::Assign(apTraits, jumpDestOut[block->bbNum], apFull); } // Compute the data flow values for all tracked expressions // IN and OUT never change for the initial basic block B1 - fgFirstBB->bbAssertionIn = 0; + BitVecOps::Assign(apTraits, fgFirstBB->bbAssertionIn, apEmpty); + return jumpDestOut; +} + +// Callback data for the VN based constant prop visitor. +struct VNAssertionPropVisitorInfo +{ + Compiler* pThis; + GenTreePtr stmt; + BasicBlock* block; + VNAssertionPropVisitorInfo(Compiler* pThis, BasicBlock* block, GenTreePtr stmt) + : pThis(pThis) + , stmt(stmt) + , block(block) + { } +}; + +//------------------------------------------------------------------------------ +// optPrepareTreeForReplacement +// Updates ref counts and extracts side effects from a tree so it can be +// replaced with a comma separated list of side effects + a new tree. +// +// Note: +// The old and new trees may be the same. In this case, the tree will be +// appended to the side-effect list (if present) and returned. +// +// Arguments: +// oldTree - The tree node to be dropped from the stmt expr. +// newTree - The tree node to append to the side effect list from "oldTree". +// +// Return Value: +// Returns a comma separated list of side-effects present in the "oldTree". +// When "newTree" is non-null: +// 1. When side-effects are present in oldTree, newTree will be appended to the +// comma separated list. +// 2. When no side effects are present, then returns the "newTree" without +// any list. +// When "newTree" is null: +// 1. Returns the extracted side-effects from "oldTree" +// 2. When no side-effects are present, returns null. +// +// Description: +// Decrements ref counts for the "oldTree" that is going to be replaced. If there +// are side effects in the tree, then ref counts for variables in the side effects +// are incremented because they need to be kept in the stmt expr. +// +// Either the "newTree" is returned when no side effects are present or a comma +// separated side effect list with "newTree" is returned. +// +GenTreePtr Compiler::optPrepareTreeForReplacement(GenTreePtr oldTree, GenTreePtr newTree) +{ + // If we have side effects, extract them and append newTree to the list. + GenTreePtr sideEffList = nullptr; + if (oldTree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) + { + gtExtractSideEffList(oldTree, &sideEffList, GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE); + } + if (sideEffList) + { + noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT); + + // Increment the ref counts as we want to keep the side effects. + lvaRecursiveIncRefCounts(sideEffList); + + if (newTree) + { + newTree = gtNewOperNode(GT_COMMA, newTree->TypeGet(), sideEffList, newTree); + } + else + { + newTree = sideEffList; + } + } + + // Decrement the ref counts as the oldTree is going to be dropped. + lvaRecursiveDecRefCounts(oldTree); + return newTree; +} + +//------------------------------------------------------------------------------ +// optVNConstantPropOnJTrue +// Constant propagate on the JTrue node by extracting side effects and moving +// them into their own statements. The relop node is then modified to yield +// true or false, so the branch can be folded. +// +// Arguments: +// block - The block that contains the JTrue. +// stmt - The JTrue stmt which can be evaluated to a constant. +// tree - The JTrue node whose relop evaluates to 0 or non-zero value. +// +// Return Value: +// The jmpTrue tree node that has relop of the form "0 =/!= 0". +// If "tree" evaluates to "true" relop is "0 == 0". Else relop is "0 != 0". +// +// Description: +// Special treatment for JTRUE nodes' constant propagation. This is because +// for JTRUE(1) or JTRUE(0), if there are side effects they need to be put +// in separate statements. This is to prevent relop's constant +// propagation from doing a simple minded conversion from +// (1) STMT(JTRUE(RELOP(COMMA(sideEffect, OP1), OP2)), S.T. op1 =/!= op2 to +// (2) STMT(JTRUE(COMMA(sideEffect, 1/0)). +// +// fgFoldConditional doesn't fold (2), a side-effecting JTRUE's op1. So, let us, +// here, convert (1) as two statements: STMT(sideEffect), STMT(JTRUE(1/0)), +// so that the JTRUE will get folded by fgFoldConditional. +// +// Note: fgFoldConditional is called from other places as well, which may be +// sensitive to adding new statements. Hence the change is not made directly +// into fgFoldConditional. +// +GenTreePtr Compiler::optVNConstantPropOnJTrue(BasicBlock* block, GenTreePtr stmt, GenTreePtr test) +{ + GenTreePtr relop = test->gtGetOp1(); + + // VN based assertion non-null on this relop has been performed. + if (!relop->OperIsCompare()) + { + return nullptr; + } + + // + // Make sure GTF_RELOP_JMP_USED flag is set so that we can later skip constant + // prop'ing a JTRUE's relop child node for a second time in the pre-order + // tree walk. + // + assert((relop->gtFlags & GTF_RELOP_JMP_USED) != 0); + + if (!vnStore->IsVNConstant(relop->gtVNPair.GetConservative())) + { + return nullptr; + } + + // Prepare the tree for replacement so any side effects can be extracted. + GenTreePtr sideEffList = optPrepareTreeForReplacement(test, nullptr); + + while (sideEffList) + { + GenTreePtr newStmt; + if (sideEffList->OperGet() == GT_COMMA) + { + newStmt = fgInsertStmtNearEnd(block, sideEffList->gtGetOp1()); + sideEffList = sideEffList->gtGetOp2(); + } + else + { + newStmt = fgInsertStmtNearEnd(block, sideEffList); + sideEffList = nullptr; + } + fgMorphBlockStmt(block, newStmt DEBUGARG(__FUNCTION__)); + gtSetStmtInfo(newStmt); + fgSetStmtSeq(newStmt); + } + + // Transform the relop's operands to be both zeroes. + ValueNum vnZero = vnStore->VNZeroForType(TYP_INT); + relop->gtOp.gtOp1 = gtNewIconNode(0); + relop->gtOp.gtOp1->gtVNPair = ValueNumPair(vnZero, vnZero); + relop->gtOp.gtOp2 = gtNewIconNode(0); + relop->gtOp.gtOp2->gtVNPair = ValueNumPair(vnZero, vnZero); + + // Update the oper and restore the value numbers. + ValueNum vnCns = relop->gtVNPair.GetConservative(); + ValueNum vnLib = relop->gtVNPair.GetLiberal(); + bool evalsToTrue = vnStore->CoercedConstantValue<INT64>(vnCns) != 0; + relop->SetOper(evalsToTrue ? GT_EQ : GT_NE); + relop->gtVNPair = ValueNumPair(vnLib, vnCns); + + return test; +} + +//------------------------------------------------------------------------------ +// optVNConstantPropCurStmt +// Performs constant prop on the current statement's tree nodes. +// +// Assumption: +// This function is called as part of a pre-order tree walk. +// +// Arguments: +// tree - The currently visited tree node. +// stmt - The statement node in which the "tree" is present. +// block - The block that contains the statement that contains the tree. +// +// Return Value: +// Returns the standard visitor walk result. +// +// Description: +// Checks if a node is an R-value and evaluates to a constant. If the node +// evaluates to constant, then the tree is replaced by its side effects and +// the constant node. +// +Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree) +{ + // Don't propagate floating-point constants into a TYP_STRUCT LclVar + // This can occur for HFA return values (see hfa_sf3E_r.exe) + if (tree->TypeGet() == TYP_STRUCT) + { + return WALK_CONTINUE; + } + + switch (tree->OperGet()) + { + // Make sure we have an R-value. + case GT_ADD: case GT_SUB: case GT_DIV: case GT_MOD: case GT_UDIV: case GT_UMOD: case GT_MULHI: + case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: + case GT_OR: case GT_XOR: case GT_AND: + case GT_LSH: case GT_RSH: case GT_RSZ: + case GT_NEG: case GT_CHS: case GT_CAST: + case GT_INTRINSIC: + break; + + case GT_JTRUE: + break; + + case GT_MUL: + // Don't transform long multiplies. + if (tree->gtFlags & GTF_MUL_64RSLT) + { + return WALK_SKIP_SUBTREES; + } + break; + + case GT_LCL_VAR: + // Make sure the local variable is an R-value. + if ((tree->gtFlags & (GTF_VAR_DEF | GTF_DONT_CSE))) + { + return WALK_CONTINUE; + } +#if FEATURE_ANYCSE + // Let's not conflict with CSE (to save the movw/movt). + if (lclNumIsCSE(tree->AsLclVarCommon()->GetLclNum())) + { + return WALK_CONTINUE; + } +#endif + break; + + default: + // Unknown node, continue to walk. + return WALK_CONTINUE; + } + + // Perform the constant propagation + GenTreePtr newTree = optVNConstantPropOnTree(block, stmt, tree); + if (newTree == nullptr) + { + // Not propagated, keep going. + return WALK_CONTINUE; + } + + // Successful propagation, mark as assertion propagated and skip + // sub-tree (with side-effects) visits. + optAssertionProp_Update(newTree, tree, stmt); + + JITDUMP("After constant propagation on [%06u]:\n", tree->gtTreeID); + DBEXEC(VERBOSE, gtDispTree(stmt)); + + return WALK_SKIP_SUBTREES; +} + +//------------------------------------------------------------------------------ +// optVnNonNullPropCurStmt +// Performs VN based non-null propagation on the tree node. +// +// Assumption: +// This function is called as part of a pre-order tree walk. +// +// Arguments: +// block - The block that contains the statement that contains the tree. +// stmt - The statement node in which the "tree" is present. +// tree - The currently visited tree node. +// +// Return Value: +// None. +// +// Description: +// Performs value number based non-null propagation on GT_CALL and +// GT_IND/GT_NULLCHECK. This is different from flow based assertions and helps +// unify VN based constant prop and non-null prop in a single pre-order walk. +// +void Compiler::optVnNonNullPropCurStmt(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree) +{ + ASSERT_TP empty = BitVecOps::MakeEmpty(apTraits); + GenTreePtr newTree = nullptr; + if (tree->OperGet() == GT_CALL) + { + newTree = optNonNullAssertionProp_Call(empty, tree, stmt); + } + else if (tree->OperGet() == GT_IND || tree->OperGet() == GT_NULLCHECK) + { + newTree = optAssertionProp_Ind(empty, tree, stmt); + } + if (newTree) + { + assert(newTree == tree); + optAssertionProp_Update(newTree, tree, stmt); + } +} + +//------------------------------------------------------------------------------ +// optVNAssertionPropCurStmtVisitor +// Unified Value Numbering based assertion propagation visitor. +// +// Assumption: +// This function is called as part of a pre-order tree walk. +// +// Return Value: +// WALK_RESULTs. +// +// Description: +// An unified value numbering based assertion prop visitor that +// performs non-null and constant assertion propagation based on +// value numbers. +// +/* static */ +Compiler::fgWalkResult Compiler::optVNAssertionPropCurStmtVisitor(GenTreePtr* ppTree, fgWalkData* data) +{ + VNAssertionPropVisitorInfo* pData = (VNAssertionPropVisitorInfo*) data->pCallbackData; + Compiler* pThis = pData->pThis; + + pThis->optVnNonNullPropCurStmt(pData->block, pData->stmt, *ppTree); + + return pThis->optVNConstantPropCurStmt(pData->block, pData->stmt, *ppTree); } /***************************************************************************** @@ -4157,16 +4593,11 @@ GenTreePtr Compiler::optVNAssertionPropCurStmt(BasicBlock* block, GenTreePtr stm // Perform VN based assertion prop first, in case we don't find // anything in assertion gen. optAssertionPropagatedCurrentStmt = false; - for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext) - { - GenTreePtr newTree = optAssertionProp(0, tree, stmt); - if (newTree) - { - tree = newTree; - } - } - if (optAssertionPropagatedCurrentStmt) + VNAssertionPropVisitorInfo data(this, block, stmt); + fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, Compiler::optVNAssertionPropCurStmtVisitor, &data); + + if (optAssertionPropagatedCurrentStmt) { fgMorphBlockStmt(block, stmt DEBUGARG("optVNAssertionPropCurStmt")); gtSetStmtInfo(stmt); @@ -4261,11 +4692,8 @@ void Compiler::optAssertionPropMain() #endif // Allocate the bits for the predicate sensitive dataflow analysis - EXPSET_TP* jumpDestGen = fgAllocateTypeForEachBlk<EXPSET_TP>(CMK_AssertionProp); - optComputeAssertionGen(jumpDestGen); - - bbJtrueAssertionOut = fgAllocateTypeForEachBlk<EXPSET_TP>(CMK_AssertionProp); - optInitAssertionDataflowFlags(bbJtrueAssertionOut, jumpDestGen); + bbJtrueAssertionOut = optInitAssertionDataflowFlags(); + ASSERT_TP* jumpDestGen = optComputeAssertionGen(); // Modified dataflow algorithm for available expressions. DataFlow flow(this); @@ -4275,7 +4703,7 @@ void Compiler::optAssertionPropMain() for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) { // Compute any implied non-Null assertions for block->bbAssertionIn - block->bbAssertionIn |= optImpliedByTypeOfAssertions(block->bbAssertionIn); + optImpliedByTypeOfAssertions(block->bbAssertionIn); } @@ -4286,12 +4714,12 @@ void Compiler::optAssertionPropMain() for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) { printf("\nBB%02u", block->bbNum); - printf(" valueIn = %s", genES2str(block->bbAssertionIn)); - printf(" valueOut = %s", genES2str(block->bbAssertionOut)); + printf(" valueIn = %s", BitVecOps::ToString(apTraits, block->bbAssertionIn)); + printf(" valueOut = %s", BitVecOps::ToString(apTraits, block->bbAssertionOut)); if (block->bbJumpKind == BBJ_COND) { printf(" => BB%02u", block->bbJumpDest->bbNum); - printf(" valueOut= %s", genES2str(bbJtrueAssertionOut[block->bbNum])); + printf(" valueOut= %s", BitVecOps::ToString(apTraits, bbJtrueAssertionOut[block->bbNum])); } } printf("\n"); @@ -4301,11 +4729,7 @@ void Compiler::optAssertionPropMain() // Perform assertion propagation (and constant folding) for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) { - EXPSET_TP assertions = block->bbAssertionIn; -#ifdef DEBUG - if (verbose) - printf("Assertions at start of BB%02u => %016I64X\n", block->bbNum, assertions); -#endif + ASSERT_TP assertions = BitVecOps::MakeCopy(apTraits, block->bbAssertionIn); // TODO-Review: EH successor/predecessor iteration seems broken. // SELF_HOST_TESTS_ARM\jit\Directed\ExcepFilters\fault\fault.exe @@ -4344,8 +4768,11 @@ void Compiler::optAssertionPropMain() // and thus we must morph, set order, re-link for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext) { - GenTreePtr newTree = optAssertionProp(assertions, tree, stmt); + JITDUMP("Propagating %s assertions for BB%02d, stmt %08X, tree %08X, tree -> %d\n", + BitVecOps::ToString(apTraits, assertions), + block->bbNum, dspPtr(stmt), dspPtr(tree), tree->GetAssertion()); + GenTreePtr newTree = optAssertionProp(assertions, tree, stmt); if (newTree) { assert(optAssertionPropagatedCurrentStmt == true); @@ -4358,18 +4785,10 @@ void Compiler::optAssertionPropMain() // If this tree makes an assertion - make it available. if (tree->HasAssertion()) { - assertions |= optGetAssertionBit(tree->GetAssertion()); + BitVecOps::AddElemD(apTraits, assertions, tree->GetAssertion() - 1); // Also include any implied assertions for the tree node. - assertions |= optImpliedAssertions(tree->GetAssertion(), assertions); -#ifdef DEBUG - if (verbose) - { - printf(" + tree "); - printTreeID(tree); - printf(", index=#%02u => %016I64X\n", tree->GetAssertion(), assertions); - } -#endif + optImpliedAssertions((AssertionIndex)tree->GetAssertion(), assertions); } } @@ -4414,5 +4833,3 @@ void Compiler::optAssertionPropMain() lvaSortAgain = true; } } - - diff --git a/src/jit/block.h b/src/jit/block.h index cef8133190..e8fd0e9854 100644 --- a/src/jit/block.h +++ b/src/jit/block.h @@ -27,7 +27,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "blockset.h" #include "jitstd.h" - +#include "bitvec.h" #include "simplerhash.h" /*****************************************************************************/ @@ -42,6 +42,11 @@ typedef unsigned int EXPSET_TP; #define EXPSET_ALL ((EXPSET_TP)0-1) +typedef BitVec ASSERT_TP; +typedef BitVec_ValArg_T ASSERT_VALARG_TP; +typedef BitVec_ValRet_T ASSERT_VALRET_TP; + + /***************************************************************************** * * Each basic block ends with a jump which is described as a value @@ -326,11 +331,10 @@ struct BasicBlock // call, or, in some cases, because the BB occurs in a loop, and // we've determined that all paths in the loop body leading to BB // include a call. - -#define BBF_UNUSED1 0x00100000 // unused -#define BBF_HAS_INDX 0x00200000 // BB contains simple index expressions. TODO: This appears to be set, but never used. +#define BBF_HAS_VTABREF 0x00100000 // BB contains reference of vtable +#define BBF_HAS_INDX 0x00200000 // BB contains simple index expressions on a array local var. #define BBF_HAS_NEWARRAY 0x00400000 // BB contains 'new' of an array -#define BBF_HAS_NEWOBJ 0x00800000 // BB contains 'new' of an object type. TODO: This appears to be set, but never used. +#define BBF_HAS_NEWOBJ 0x00800000 // BB contains 'new' of an object type. #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) #define BBF_FINALLY_TARGET 0x01000000 // BB is the target of a finally return: where a finally will return during non-exceptional flow. @@ -814,16 +818,16 @@ typedef unsigned weight_t; // Type used to hold block and edge weigh union { - EXPSET_TP bbCseGen; // CSEs computed by block + EXPSET_TP bbCseGen; // CSEs computed by block #if ASSERTION_PROP - EXPSET_TP bbAssertionGen; // value assignments computed by block + ASSERT_TP bbAssertionGen; // value assignments computed by block #endif }; union { #if ASSERTION_PROP - EXPSET_TP bbAssertionKill; // value assignments killed by block + ASSERT_TP bbAssertionKill; // value assignments killed by block #endif }; @@ -831,7 +835,7 @@ typedef unsigned weight_t; // Type used to hold block and edge weigh { EXPSET_TP bbCseIn; // CSEs available on entry #if ASSERTION_PROP - EXPSET_TP bbAssertionIn; // value assignments available on entry + ASSERT_TP bbAssertionIn; // value assignments available on entry #endif }; @@ -839,7 +843,7 @@ typedef unsigned weight_t; // Type used to hold block and edge weigh { EXPSET_TP bbCseOut; // CSEs available on exit #if ASSERTION_PROP - EXPSET_TP bbAssertionOut; // value assignments available on exit + ASSERT_TP bbAssertionOut; // value assignments available on exit #endif }; @@ -936,9 +940,15 @@ typedef unsigned weight_t; // Type used to hold block and edge weigh bool endsWithJmpMethod(Compiler *comp); +#if FEATURE_FASTTAILCALL + bool endsWithTailCall(Compiler* comp, bool fastTailCallsOnly, bool tailCallsConvertibleToLoopOnly, GenTree** tailCall); + bool endsWithTailCallOrJmp(Compiler *comp, bool fastTailCallsOnly = false); + bool endsWithTailCallConvertibleToLoop(Compiler *comp, GenTree** tailCall); +#endif // FEATURE_FASTTAILCALL + #if JIT_FEATURE_SSA_SKIP_DEFS // Returns the first statement in the statement list of "this" that is // not an SSA definition (a lcl = phi(...) assignment). diff --git a/src/jit/codegen.h b/src/jit/codegen.h index 9e7fd3ef9c..79ecc295d7 100644 --- a/src/jit/codegen.h +++ b/src/jit/codegen.h @@ -102,6 +102,10 @@ private: // branch to on compare condition being true. 'false' label corresponds to the target to // branch to on condition being false. static void genJumpKindsForTree(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2]); +#if !defined(_TARGET_64BIT_) + static void genJumpKindsForTreeLongHi(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2]); + static void genJumpKindsForTreeLongLo(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2]); +#endif //!defined(_TARGET_64BIT_) #endif // _TARGET_XARCH_ static bool genShouldRoundFP(); @@ -398,13 +402,8 @@ protected: FuncletFrameInfoDsc genFuncletInfo; -#elif defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87 +#elif defined(_TARGET_AMD64_) - // Save/Restore callee saved float regs to stack - void genPreserveCalleeSavedFltRegs(unsigned lclFrameSize); - void genRestoreCalleeSavedFltRegs(unsigned lclFrameSize); - -#ifdef _TARGET_AMD64_ // A set of information that is used by funclet prolog and epilog generation. It is collected once, before // funclet prologs and epilogs are generated, and used by all funclet prologs and epilogs, which must all be the same. struct FuncletFrameInfoDsc @@ -415,8 +414,15 @@ protected: }; FuncletFrameInfoDsc genFuncletInfo; + #endif // _TARGET_AMD64_ +#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87 + + // Save/Restore callee saved float regs to stack + void genPreserveCalleeSavedFltRegs(unsigned lclFrameSize); + void genRestoreCalleeSavedFltRegs(unsigned lclFrameSize); + #endif // _TARGET_XARCH_ && FEATURE_STACK_FP_X87 #if !FEATURE_STACK_FP_X87 diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp index b3dc2d2534..645d553d20 100644 --- a/src/jit/codegenarm.cpp +++ b/src/jit/codegenarm.cpp @@ -301,7 +301,7 @@ void CodeGen::genCodeForBBlist() /* Figure out which registers hold variables on entry to this block */ - regSet.rsMaskVars = RBM_NONE; + regSet.ClearMaskVars(); gcInfo.gcRegGCrefSetCur = RBM_NONE; gcInfo.gcRegByrefSetCur = RBM_NONE; @@ -341,26 +341,6 @@ void CodeGen::genCodeForBBlist() } } -#ifdef DEBUG - if (compiler->verbose) - { - printf("\t\t\t\t\t\t\tLive regs: "); - if (regSet.rsMaskVars == newLiveRegSet) - { - printf("(unchanged) "); - } - else - { - printRegMaskInt(regSet.rsMaskVars); - compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars); - printf(" => "); - } - printRegMaskInt(newLiveRegSet); - compiler->getEmitter()->emitDispRegSet(newLiveRegSet); - printf("\n"); - } -#endif // DEBUG - regSet.rsMaskVars = newLiveRegSet; gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUG_ARG(true)); gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUG_ARG(true)); @@ -1377,9 +1357,9 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) genProduceReg(treeNode); break; - case GT_MATH: + case GT_INTRINSIC: { - NYI("GT_MATH"); + NYI("GT_INTRINSIC"); } genProduceReg(treeNode); break; @@ -1838,7 +1818,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree *tree) } #endif // DEBUG - regSet.rsMaskVars |= genGetRegMask(varDsc); + regSet.AddMaskVars(genGetRegMask(varDsc)); } else { diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 952a7d024e..2922275493 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -872,7 +872,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) else { // This is a non-filter funclet - getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, -genFuncletInfo.fiFunction_CallerSP_to_FP_delta); + getEmitter()->emitIns_R_R_Imm(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, -genFuncletInfo.fiFunction_CallerSP_to_FP_delta); regTracker.rsTrackRegTrash(REG_R3); getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta); } @@ -1133,20 +1133,10 @@ void CodeGen::genSpillVar(GenTreePtr tree) instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum)); - if (varTypeIsMultiReg(tree)) - { - assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair)); - assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair)); - regNumber regLo = genRegPairLo(tree->gtRegPair); - regNumber regHi = genRegPairHi(tree->gtRegPair); - inst_TT_RV(storeIns, tree, regLo); - inst_TT_RV(storeIns, tree, regHi, 4); - } - else - { - assert(varDsc->lvRegNum == tree->gtRegNum); - inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size); - } + + assert(varDsc->lvRegNum == tree->gtRegNum); + inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size); + tree->gtFlags |= GTF_REG_VAL; if (restoreRegVar) @@ -1244,7 +1234,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) { // Ngen case - GS cookie constant needs to be accessed through an indirection. instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr); - getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, 0); + getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, 0); } // Load this method's GS value from the stack frame getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0); @@ -1382,7 +1372,7 @@ void CodeGen::genCodeForBBlist() /* Figure out which registers hold variables on entry to this block */ - regSet.rsMaskVars = RBM_NONE; + regSet.ClearMaskVars(); gcInfo.gcRegGCrefSetCur = RBM_NONE; gcInfo.gcRegByrefSetCur = RBM_NONE; @@ -1439,23 +1429,11 @@ void CodeGen::genCodeForBBlist() } } + regSet.rsMaskVars = newLiveRegSet; + #ifdef DEBUG if (compiler->verbose) { - printf("\t\t\t\t\t\t\tLive regs: "); - if (regSet.rsMaskVars == newLiveRegSet) - { - printf("(unchanged) "); - } - else - { - printRegMaskInt(regSet.rsMaskVars); - compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars); - printf(" => "); - } - printRegMaskInt(newLiveRegSet); - compiler->getEmitter()->emitDispRegSet(newLiveRegSet); - printf("\n"); if (!VarSetOps::IsEmpty(compiler, addedGCVars)) { printf("\t\t\t\t\t\t\tAdded GCVars: "); @@ -1471,7 +1449,6 @@ void CodeGen::genCodeForBBlist() } #endif // DEBUG - regSet.rsMaskVars = newLiveRegSet; gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUG_ARG(true)); gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUG_ARG(true)); @@ -2025,7 +2002,9 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, if (EA_IS_RELOC(size)) { - NYI("Reloc constant"); + // Emit a data section constant for a relocatable integer constant. + CORINFO_FIELD_HANDLE hnd = getEmitter()->emitLiteralConst(imm); + getEmitter()->emitIns_R_C(INS_ldr, size, reg, hnd, 0); } else if (imm == 0) { @@ -2440,32 +2419,6 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) // The per-case functions call genProduceReg() break; - case GT_LCL_VAR: - { - // lcl_vars are not defs - assert((treeNode->gtFlags & GTF_VAR_DEF) == 0); - - GenTreeLclVarCommon *lcl = treeNode->AsLclVarCommon(); - bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate(); - - if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH)) - { - assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED)); - } - - // If this is a register candidate that has been spilled, genConsumeReg() will - // reload it at the point of use. Otherwise, if it's not in a register, we load it here. - - if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED)) - { - assert(!isRegCandidate); - emit->emitIns_R_S(ins_Load(targetType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), - emitTypeSize(treeNode), targetReg, lcl->gtLclNum, 0); - genProduceReg(treeNode); - } - } - break; - case GT_LCL_FLD_ADDR: case GT_LCL_VAR_ADDR: // Address of a local var. This by itself should never be allocated a register. @@ -2473,79 +2426,159 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) // a temp and that would be allocated a register. noway_assert(targetType == TYP_BYREF); noway_assert(!treeNode->InReg()); - + inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF); genProduceReg(treeNode); break; case GT_LCL_FLD: { + GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon(); + assert(varNode->gtLclNum < compiler->lvaCount); + unsigned varNum = varNode->gtLclNum; + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + + if (targetType == TYP_STRUCT) + { + NYI("GT_LCL_FLD with TYP_STRUCT"); + } noway_assert(targetType != TYP_STRUCT); noway_assert(targetReg != REG_NA); - unsigned offs = treeNode->gtLclFld.gtLclOffs; - unsigned varNum = treeNode->gtLclVarCommon.gtLclNum; - assert(varNum < compiler->lvaCount); + unsigned offset = treeNode->gtLclFld.gtLclOffs; - emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), EA_8BYTE, targetReg, varNum, offs); + if (varTypeIsFloating(targetType)) + { + if (treeNode->InReg()) + { + NYI("GT_LCL_FLD with register to register Floating point move"); + } + else + { + emit->emitIns_R_S(ins_Load(targetType), emitTypeSize(targetType), targetReg, varNum, offset); + } + } + else + { + emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), EA_8BYTE, targetReg, varNum, offset); + } genProduceReg(treeNode); } break; - case GT_STORE_LCL_FLD: + case GT_LCL_VAR: { - NYI_IF(varTypeIsFloating(targetType), "Code generation for FP field assignment"); + GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon(); - noway_assert(targetType != TYP_STRUCT); - noway_assert(!treeNode->InReg()); + unsigned varNum = varNode->gtLclNum; assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + bool isRegCandidate = varDsc->lvIsRegCandidate(); - unsigned offs = treeNode->gtLclFld.gtLclOffs; - unsigned varNum = treeNode->gtLclVarCommon.gtLclNum; - assert(varNum < compiler->lvaCount); + // lcl_vars are not defs + assert((treeNode->gtFlags & GTF_VAR_DEF) == 0); - GenTreePtr op1 = treeNode->gtOp.gtOp1; - genConsumeRegs(op1); + if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH)) + { + assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED)); + } + + // If this is a register candidate that has been spilled, genConsumeReg() will + // reload it at the point of use. Otherwise, if it's not in a register, we load it here. + + if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED)) + { + assert(!isRegCandidate); + if (targetType == TYP_STRUCT) + { + // At this point any TYP_STRUCT LclVar must be a two register argument + assert(varDsc->lvSize() == 16); + emit->emitIns_R_S(ins_Load(TYP_I_IMPL), emitTypeSize(TYP_I_IMPL), targetReg, varNum, 0); + emit->emitIns_R_S(ins_Load(TYP_I_IMPL), emitTypeSize(TYP_I_IMPL), REG_NEXT(targetReg), varNum, TARGET_POINTER_SIZE); + } + else // targetType is a normal scalar type and not a TYP_STRUCT + { + instruction ins = ins_Load(targetType); + emitAttr attr = emitTypeSize(targetType); - emit->emitIns_R_S(ins_Store(targetType), emitTypeSize(targetType), op1->gtRegNum, varNum, offs); + attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); + + emit->emitIns_R_S(ins, attr, targetReg, varNum, 0); + } + genProduceReg(treeNode); + } } break; + case GT_STORE_LCL_FLD: case GT_STORE_LCL_VAR: - { + { + if (targetType == TYP_STRUCT) + { + NYI("GT_STORE_LCL_VAR/FLD with TYP_STRUCT"); + } noway_assert(targetType != TYP_STRUCT); - unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum; - LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); + GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon(); + + unsigned varNum = varNode->gtLclNum; assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + unsigned offset = 0; + + if (treeNode->gtOper == GT_STORE_LCL_FLD) + { + // record the offset, only used with GT_STORE_LCL_FLD + offset = treeNode->gtLclFld.gtLclOffs; + // We must have a stack store with GT_STORE_LCL_FLD + noway_assert(!treeNode->InReg()); + noway_assert(targetReg == REG_NA); + } // Ensure that lclVar nodes are typed correctly. assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); - GenTreePtr op1 = treeNode->gtOp.gtOp1; - genConsumeRegs(op1); - if (targetReg == REG_NA) + GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal(); + genConsumeRegs(data); + + regNumber dataReg = REG_NA; + if (data->isContainedIntOrIImmed()) { - // stack store - emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), emitTypeSize(treeNode), treeNode); - varDsc->lvRegNum = REG_STK; + assert(data->IsZero()); + dataReg = REG_ZR; } - else // store into register (i.e move into register) + else { - if (op1->isContained()) + assert(!data->isContained()); + genConsumeReg(data); + dataReg = data->gtRegNum; + } + assert(dataReg != REG_NA); + + if (targetReg == REG_NA) // store into stack based LclVar + { + // Only true gtLclVar subclass nodes currently have a gtLclILoffs instance field + // + if(treeNode->gtOper != GT_STORE_LCL_FLD) { - // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register - // must be a constant. However, in the future we might want to support a contained memory op. - // This is a bit tricky because we have to decide it's contained before register allocation, - // and this would be a case where, once that's done, we need to mark that node as always - // requiring a register - which we always assume now anyway, but once we "optimize" that - // we'll have to take cases like this into account. - assert((op1->gtRegNum == REG_NA) && op1->OperIsConst()); - genSetRegToConst(targetReg, targetType, op1); + inst_set_SV_var(varNode); } - else if (op1->gtRegNum != targetReg) + + instruction ins = ins_Store(targetType); + emitAttr attr = emitTypeSize(targetType); + + attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); + + emit->emitIns_S_R(ins, attr, dataReg, varNum, offset); + + genUpdateLife(varNode); + + varDsc->lvRegNum = REG_STK; + } + else // store into register (i.e move into register) + { + if (dataReg != targetReg) { - // Setup targetReg when op1 is not a matching register - assert(op1->gtRegNum != REG_NA); - inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType); + // Assign into targetReg when dataReg (from op1) is not the same register + inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType); } genProduceReg(treeNode); } @@ -2610,7 +2643,7 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) if (movRequired) { emitAttr movSize = EA_ATTR(genTypeSize(targetType)); - getEmitter()->emitIns_R_R(INS_mov, movSize, retReg, op1->gtRegNum); + emit->emitIns_R_R(INS_mov, movSize, retReg, op1->gtRegNum); } } @@ -2642,10 +2675,14 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_IND: genConsumeAddress(treeNode->AsIndir()->Addr()); - emit->emitInsMov(ins_Load(targetType), emitTypeSize(treeNode), treeNode); + emit->emitInsLoadStoreOp(ins_Load(targetType), emitTypeSize(treeNode), targetReg, treeNode->AsIndir()); genProduceReg(treeNode); break; + case GT_LDOBJ: + genCodeForLdObj(treeNode->AsOp()); + break; + case GT_MULHI: genCodeForMulHi(treeNode->AsOp()); genProduceReg(treeNode); @@ -2659,8 +2696,8 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) noway_assert(!"Codegen for GT_MOD/GT_UMOD"); break; - case GT_MATH: - genMathIntrinsic(treeNode); + case GT_INTRINSIC: + genIntrinsic(treeNode); break; #ifdef FEATURE_SIMD @@ -2854,7 +2891,7 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) genGCWriteBarrier(treeNode, writeBarrierForm); } - else + else // A normal store, not a WriteBarrier store { bool reverseOps = ((treeNode->gtFlags & GTF_REVERSE_OPS) != 0); bool dataIsUnary = false; @@ -2865,35 +2902,30 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) { genConsumeAddress(addr); } - if (data->isContained() && !data->OperIsLeaf()) - { - dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0); - if (!dataIsUnary) - { - nonRMWsrc = data->gtGetOp1(); - if (nonRMWsrc->isIndir() && Lowering::IndirsAreEquivalent(nonRMWsrc, treeNode)) - { - nonRMWsrc = data->gtGetOp2(); - } - genConsumeRegs(nonRMWsrc); - } - } - else + + if (!data->isContained()) { genConsumeRegs(data); } + if (reverseOps) { genConsumeAddress(addr); } - if (data->isContained() && !data->OperIsLeaf()) + + regNumber dataReg = REG_NA; + if (data->isContainedIntOrIImmed()) { - NYI("RMW?"); + assert(data->IsZero()); + dataReg = REG_ZR; } - else + else // data is not contained, so evaluate it into a register { - emit->emitInsMov(ins_Store(targetType), emitTypeSize(treeNode), treeNode); + assert(!data->isContained()); + dataReg = data->gtRegNum; } + + emit->emitInsLoadStoreOp(ins_Store(targetType), emitTypeSize(treeNode), dataReg, treeNode->AsIndir()); } } break; @@ -3024,9 +3056,16 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) break; case GT_PUTARG_REG: + if (targetType == TYP_STRUCT) { - noway_assert(targetType != TYP_STRUCT); + noway_assert((treeNode->gtOp.gtOp1->OperGet() == GT_LDOBJ) || + (treeNode->gtOp.gtOp1->OperGet() == GT_LCL_VAR) ); + // Currently we just expect that gtOp1 should have loaded the correct register pair + noway_assert(targetReg == treeNode->gtOp.gtOp1->gtRegNum); + } + else + { // commas show up here commonly, as part of a nullchk operation GenTree *op1 = treeNode->gtOp.gtOp1; // If child node is not already in the register we need, move it @@ -3510,9 +3549,10 @@ CodeGen::genLclHeap(GenTreePtr tree) genSetRegToIcon(regCnt, amount, ((int)amount == amount)? TYP_INT : TYP_LONG); } - loop = genCreateTempLabel(); if (compiler->info.compInitMem) { + BasicBlock* loop = genCreateTempLabel(); + // At this point 'regCnt' is set to the total number of bytes to locAlloc. // Since we have to zero out the allocated memory AND ensure that RSP is always valid // by tickling the pages, we will just push 0's on the stack. @@ -3536,65 +3576,75 @@ CodeGen::genLclHeap(GenTreePtr tree) } else { - //At this point 'regCnt' is set to the total number of bytes to locAlloc. + // At this point 'regCnt' is set to the total number of bytes to locAlloc. + // + // We don't need to zero out the allocated memory. However, we do have + // to tickle the pages to ensure that SP is always valid and is + // in sync with the "stack guard page". Note that in the worst + // case SP is on the last byte of the guard page. Thus you must + // touch SP+0 first not SP+x01000. // - //We don't need to zero out the allocated memory. However, we do have - //to tickle the pages to ensure that ESP is always valid and is - //in sync with the "stack guard page". Note that in the worst - //case ESP is on the last byte of the guard page. Thus you must - //touch ESP+0 first not ESP+x01000. + // Another subtlety is that you don't want SP to be exactly on the + // boundary of the guard page because PUSH is predecrement, thus + // call setup would not touch the guard page but just beyond it // - //Another subtlety is that you don't want ESP to be exactly on the - //boundary of the guard page because PUSH is predecrement, thus - //call setup would not touch the guard page but just beyond it + // Note that we go through a few hoops so that SP never points to + // illegal pages at any time during the ticking process // - //Note that we go through a few hoops so that ESP never points to - //illegal pages at any time during the ticking process + // subs regCnt, SP, regCnt // regCnt now holds ultimate SP + // jb Loop // result is smaller than orignial SP (no wrap around) + // mov regCnt, #0 // Overflow, pick lowest possible value // - // neg REGCNT - // add REGCNT, ESP // reg now holds ultimate ESP - // jb loop // result is smaller than orignial ESP (no wrap around) - // xor REGCNT, REGCNT, // Overflow, pick lowest possible number - // loop: - // test ESP, [ESP+0] // tickle the page - // mov REGTMP, ESP - // sub REGTMP, PAGE_SIZE - // mov ESP, REGTMP - // cmp ESP, REGCNT - // jae loop + // Loop: + // ldr wzr, [SP + 0] // tickle the page - read from the page + // sub regTmp, SP, PAGE_SIZE // decrement SP by PAGE_SIZE + // cmp regTmp, regCnt + // jb Done + // mov SP, regTmp + // j Loop // - // mov ESP, REG - // end: - inst_RV(INS_NEG, regCnt, TYP_I_IMPL); - inst_RV_RV(INS_adds, regCnt, REG_SPBASE, TYP_I_IMPL); - inst_JMP(EJ_jb, loop); + // Done: + // mov SP, regCnt + // + + // Setup the regTmp + assert(tmpRegsMask != RBM_NONE); + assert(genCountBits(tmpRegsMask) == 1); + regNumber regTmp = genRegNumFromMask(tmpRegsMask); + + BasicBlock* loop = genCreateTempLabel(); + BasicBlock* done = genCreateTempLabel(); + // subs regCnt, SP, regCnt // regCnt now holds ultimate SP + getEmitter()->emitIns_R_R_R(INS_subs, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt); + + inst_JMP(EJ_jno, loop); // branch if the V flag is not set + + // Overflow, set regCnt to lowest possible value instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt); genDefineTempLabel(loop); - // Tickle the decremented value, and move back to ESP, - // note that it has to be done BEFORE the update of ESP since - // ESP might already be on the guard page. It is OK to leave - // the final value of ESP on the guard page - getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); + // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page + getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, 0); - // This is a harmless workaround to avoid the emitter trying to track the - // decrement of the ESP - we do the subtraction in another reg instead - // of adjusting ESP directly. - assert(tmpRegsMask != RBM_NONE); - assert(genCountBits(tmpRegsMask) == 1); - regNumber regTmp = genRegNumFromMask(tmpRegsMask); + // decrement SP by PAGE_SIZE + getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, regTmp, REG_SPBASE, CORINFO_PAGE_SIZE); - inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL); - inst_RV_IV(INS_sub, regTmp, CORINFO_PAGE_SIZE, EA_PTRSIZE); - inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL); + getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regCnt); + inst_JMP(EJ_jb, done); - inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL); - inst_JMP(EJ_jae, loop); + // Update SP to be at the next page of stack that we will tickle + getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt); - // Move the final value to ESP - inst_RV_RV(INS_mov, REG_SPBASE, regCnt); + // Jump to loop and tickle new stack address + inst_JMP(EJ_jmp, loop); + + // Done with stack tickle loop + genDefineTempLabel(done); + + // Now just move the final value to SP + getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt); } ALLOC_DONE: @@ -3730,7 +3780,6 @@ void CodeGen::genCodeForInitBlk(GenTreeInitBlk* initBlkNode) // offset: distance from the base from which to load void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset) { -#if 0 emitter *emit = getEmitter(); if (base->OperIsLocalAddr()) @@ -3741,11 +3790,8 @@ void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst } else { - emit->emitIns_R_AR(ins, size, dst, base->gtRegNum, offset); + emit->emitIns_R_R_I(ins, size, dst, base->gtRegNum, offset); } -#else // !0 - NYI("genCodeForLoadOffset"); -#endif // !0 } // Generate code for a store to some address + offset @@ -4229,42 +4275,40 @@ CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsi void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) { -#if 0 + emitter * emit = getEmitter(); GenTreePtr arrObj = arrIndex->ArrObj(); GenTreePtr indexNode = arrIndex->IndexExpr(); + regNumber arrReg = genConsumeReg(arrObj); + regNumber indexReg = genConsumeReg(indexNode); + regNumber tgtReg = arrIndex->gtRegNum; noway_assert(tgtReg != REG_NA); - regNumber arrReg = genConsumeReg(arrObj); - regNumber indexReg = genConsumeReg(indexNode); - regNumber tgtReg = arrIndex->gtRegNum; + // We will use a temp register to load the lower bound and dimension size values + // + regMaskTP tmpRegsMask = arrIndex->gtRsvdRegs; // there will be two bits set + tmpRegsMask &= ~genRegMask(tgtReg); // remove the bit for 'tgtReg' from 'tmpRegsMask' - unsigned dim = arrIndex->gtCurrDim; - unsigned rank = arrIndex->gtArrRank; - var_types elemType = arrIndex->gtArrElemType; + regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask + regNumber tmpReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask + noway_assert(tmpReg != REG_NA); - noway_assert(tgtReg != REG_NA); + assert(tgtReg != tmpReg); - // Subtract the lower bound for this dimension. - // TODO-ARM64-CQ: make this contained if it's an immediate that fits. - if (tgtReg != indexReg) - { - inst_RV_RV(INS_mov, tgtReg, indexReg, indexNode->TypeGet()); - } - getEmitter()->emitIns_R_AR(INS_sub, - emitActualTypeSize(TYP_INT), - tgtReg, - arrReg, - genOffsetOfMDArrayLowerBound(elemType, rank, dim)); - getEmitter()->emitIns_R_AR(INS_cmp, - emitActualTypeSize(TYP_INT), - tgtReg, - arrReg, - genOffsetOfMDArrayDimensionSize(elemType, rank, dim)); - genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL); + unsigned dim = arrIndex->gtCurrDim; + unsigned rank = arrIndex->gtArrRank; + var_types elemType = arrIndex->gtArrElemType; + unsigned offset; + offset = genOffsetOfMDArrayLowerBound(elemType, rank, dim); + emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load + emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tgtReg, indexReg, tmpReg); + + offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim); + emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load + emit->emitIns_R_R(INS_cmp, EA_4BYTE, tgtReg, tmpReg); + + genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL); + genProduceReg(arrIndex); -#else // !0 - NYI("genCodeForArrIndex"); -#endif // !0 } //------------------------------------------------------------------------ @@ -4286,51 +4330,31 @@ CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) { -#if 0 GenTreePtr offsetNode = arrOffset->gtOffset; GenTreePtr indexNode = arrOffset->gtIndex; - GenTreePtr arrObj = arrOffset->gtArrObj; - - regNumber tgtReg = arrOffset->gtRegNum; + regNumber tgtReg = arrOffset->gtRegNum; noway_assert(tgtReg != REG_NA); - unsigned dim = arrOffset->gtCurrDim; - unsigned rank = arrOffset->gtArrRank; - var_types elemType = arrOffset->gtArrElemType; - - // We will use a temp register for the offset*scale+effectiveIndex computation. - regMaskTP tmpRegMask = arrOffset->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - if (!offsetNode->IsZero()) { - // Evaluate tgtReg = offsetReg*dim_size + indexReg. - // tmpReg is used to load dim_size and the result of the multiplication. - // Note that dim_size will never be negative. - regNumber offsetReg = genConsumeReg(offsetNode); - regNumber indexReg = genConsumeReg(indexNode); - regNumber arrReg = genConsumeReg(arrObj); - - getEmitter()->emitIns_R_AR(INS_mov, - emitActualTypeSize(TYP_INT), - tmpReg, - arrReg, - genOffsetOfMDArrayDimensionSize(elemType, rank, dim)); - inst_RV_RV(INS_imul, tmpReg, offsetReg); + emitter * emit = getEmitter(); + GenTreePtr arrObj = arrOffset->gtArrObj; + regNumber arrReg = genConsumeReg(arrObj); noway_assert(arrReg != REG_NA); + regNumber offsetReg = genConsumeReg(offsetNode); noway_assert(offsetReg != REG_NA); + regNumber indexReg = genConsumeReg(indexNode); noway_assert(indexReg != REG_NA); + regMaskTP tmpRegMask = arrOffset->gtRsvdRegs; + regNumber tmpReg = genRegNumFromMask(tmpRegMask); noway_assert(tmpReg != REG_NA); + unsigned dim = arrOffset->gtCurrDim; + unsigned rank = arrOffset->gtArrRank; + var_types elemType = arrOffset->gtArrElemType; + unsigned offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim); + + // Load tmpReg with the dimension size + emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load - if (tmpReg == tgtReg) - { - inst_RV_RV(INS_add, tmpReg, indexReg); - } - else - { - if (indexReg != tgtReg) - { - inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_I_IMPL); - } - inst_RV_RV(INS_add, tgtReg, tmpReg); - } + // Evaluate tgtReg = offsetReg*dim_size + indexReg. + emit->emitIns_R_R_R_R(INS_madd, EA_4BYTE, tgtReg, tmpReg, offsetReg, indexReg); } else { @@ -4341,9 +4365,6 @@ CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) } } genProduceReg(arrOffset); -#else // !0 - NYI("genCodeForArrOffset"); -#endif // !0 } // make a temporary indir we can feed to pattern matching routines @@ -4427,6 +4448,9 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) case GT_LSH: ins = INS_lsl; break; + case GT_NEG: + ins = INS_neg; + break; case GT_NOT: ins = INS_mvn; break; @@ -4491,7 +4515,10 @@ void CodeGen::genCodeForShift(GenTreePtr operand, } else { - getEmitter()->emitIns_R_R_I(ins, size, parent->gtRegNum, operand->gtRegNum, shiftBy->gtIntCon.gtIconVal); + unsigned immWidth = emitter::getBitWidth(size); // immWidth will be set to 32 or 64 + ssize_t shiftByImm = shiftBy->gtIntCon.gtIconVal & (immWidth-1); + + getEmitter()->emitIns_R_R_I(ins, size, parent->gtRegNum, operand->gtRegNum, shiftByImm); } genProduceReg(parent); @@ -4558,7 +4585,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree *tree) } #endif // DEBUG - regSet.rsMaskVars |= genGetRegMask(varDsc); + regSet.AddMaskVars(genGetRegMask(varDsc)); } } else @@ -5267,7 +5294,7 @@ void CodeGen::genJmpMethod(GenTreePtr jmp) // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. // Therefore manually update life of varDsc->lvRegNum. regMaskTP tempMask = genRegMask(varDsc->lvRegNum); - regSet.rsMaskVars &= ~tempMask; + regSet.RemoveMaskVars(tempMask); gcInfo.gcMarkRegSetNpt(tempMask); if (varDsc->lvTracked) { @@ -5318,7 +5345,7 @@ void CodeGen::genJmpMethod(GenTreePtr jmp) // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList(). - regSet.rsMaskVars |= genRegMask(argReg); + regSet.AddMaskVars(genRegMask(argReg)); gcInfo.gcMarkRegPtrVal(argReg, loadType); if (varDsc->lvTracked) { @@ -5399,6 +5426,7 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode *lea) genConsumeOperands(lea); emitter *emit = getEmitter(); emitAttr size = emitTypeSize(lea); + unsigned offset = lea->gtOffset; // In ARM64 we can only load addresses of the form: // @@ -5416,6 +5444,10 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode *lea) // produce LEAs that are a 1:1 relationship to the ARM64 architecture. if (lea->Base() && lea->Index()) { + GenTree* memBase = lea->Base(); + GenTree* index = lea->Index(); + unsigned offset = lea->gtOffset; + DWORD lsl; assert(isPow2(lea->gtScale)); @@ -5423,30 +5455,84 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode *lea) assert(lsl <= 4); - // First, generate code to load rd = [base + index*scale] - if (lsl > 0) + if (offset != 0) { - emit->emitIns_R_R_R_I(INS_add, size, lea->gtRegNum, lea->Base()->gtRegNum, lea->Index()->gtRegNum, lsl, INS_OPTS_LSL); + regMaskTP tmpRegMask = lea->gtRsvdRegs; + regNumber tmpReg = genRegNumFromMask(tmpRegMask); + noway_assert(tmpReg != REG_NA); + + if (emitter::emitIns_valid_imm_for_add(offset, EA_8BYTE)) + { + if (lsl > 0) + { + // Generate code to set tmpReg = base + index*scale + emit->emitIns_R_R_R_I(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum, lsl, INS_OPTS_LSL); + } + else // no scale + { + // Generate code to set tmpReg = base + index + emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum); + } + + // Then compute target reg from [tmpReg + offset] + emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, tmpReg, offset);; + } + else // large offset + { + // First load/store tmpReg with the large offset constant + instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); + // Then add the base register + // rd = rd + base + emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, tmpReg, memBase->gtRegNum); + + noway_assert(tmpReg != index->gtRegNum); + + // Then compute target reg from [tmpReg + index*scale] + emit->emitIns_R_R_R_I(INS_add, size, lea->gtRegNum, tmpReg, index->gtRegNum, lsl, INS_OPTS_LSL); + } } else { - emit->emitIns_R_R_R(INS_add, size, lea->gtRegNum, lea->Base()->gtRegNum, lea->Index()->gtRegNum); - } - // If the offset is not zero, then compute rd = [rd + offset] - if (lea->gtOffset != 0) - { - emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, lea->gtRegNum, (int) lea->gtOffset); + if (lsl > 0) + { + // Then compute target reg from [base + index*scale] + emit->emitIns_R_R_R_I(INS_add, size, lea->gtRegNum, memBase->gtRegNum, index->gtRegNum, lsl, INS_OPTS_LSL); + } + else + { + // Then compute target reg from [base + index] + emit->emitIns_R_R_R(INS_add, size, lea->gtRegNum, memBase->gtRegNum, index->gtRegNum); + } } } else if (lea->Base()) { - if (lea->gtOffset != 0) + GenTree* memBase = lea->Base(); + + if (emitter::emitIns_valid_imm_for_add(offset, EA_8BYTE)) { - emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, lea->Base()->gtRegNum, (int) lea->gtOffset); + if (offset != 0) + { + // Then compute target reg from [memBase + offset] + emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, memBase->gtRegNum, offset); + } + else // offset is zero + { + emit->emitIns_R_R(INS_mov, size, lea->gtRegNum, memBase->gtRegNum); + } } else { - emit->emitIns_R_R(INS_mov, size, lea->gtRegNum, lea->Base()->gtRegNum); + // We require a tmpReg to hold the offset + regMaskTP tmpRegMask = lea->gtRsvdRegs; + regNumber tmpReg = genRegNumFromMask(tmpRegMask); + noway_assert(tmpReg != REG_NA); + + // First load tmpReg with the large offset constant + instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); + + // Then compute target reg from [memBase + tmpReg] + emit->emitIns_R_R_R(INS_add, size, lea->gtRegNum, memBase->gtRegNum, tmpReg); } } else if (lea->Index()) @@ -5505,104 +5591,29 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) emitAttr movSize = emitActualTypeSize(dstType); bool movRequired = false; - bool isUnsignedDst = varTypeIsUnsigned(dstType); - bool isUnsignedSrc = varTypeIsUnsigned(srcType); - - bool requiresOverflowCheck = false; - regNumber targetReg = treeNode->gtRegNum; regNumber sourceReg = castOp->gtRegNum; + // For Long to Int conversion we will have a reserved integer register to hold the immediate mask + regNumber tmpReg = (treeNode->gtRsvdRegs == RBM_NONE) ? REG_NA : genRegNumFromMask(treeNode->gtRsvdRegs); + assert(genIsValidIntReg(targetReg)); assert(genIsValidIntReg(sourceReg)); instruction ins = INS_invalid; - // If necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set. - if (!isUnsignedSrc && (treeNode->gtFlags & GTF_UNSIGNED) != 0) - { - srcType = genUnsignedType(srcType); - isUnsignedSrc = true; - } - - if (treeNode->gtOverflow() && (genTypeSize(srcType) >= genTypeSize(dstType) || (srcType == TYP_INT && dstType == TYP_ULONG))) - { - requiresOverflowCheck = true; - } - genConsumeReg(castOp); + Lowering::CastInfo castInfo; - if (requiresOverflowCheck) - { - emitAttr cmpSize = EA_ATTR(genTypeSize(srcType)); - ssize_t typeMin = 0; - ssize_t typeMax = 0; - ssize_t typeMask = 0; - bool signCheckOnly = false; - - /* Do we need to compare the value, or just check masks */ + // Get information about the cast. + Lowering::getCastDescription(treeNode, &castInfo); - switch (dstType) - { - case TYP_BYTE: - typeMask = ssize_t((int)0xFFFFFF80); - typeMin = SCHAR_MIN; - typeMax = SCHAR_MAX; - break; - - case TYP_UBYTE: - typeMask = ssize_t((int)0xFFFFFF00L); - break; - - case TYP_SHORT: - typeMask = ssize_t((int)0xFFFF8000); - typeMin = SHRT_MIN; - break; - - case TYP_CHAR: - typeMask = ssize_t((int)0xFFFF0000L); - break; - - case TYP_INT: - if (srcType == TYP_UINT) - { - signCheckOnly = true; - } - else - { - typeMask = 0xFFFFFFFF80000000LL; - typeMin = INT_MIN; - typeMax = INT_MAX; - } - break; - - case TYP_UINT: - if (srcType == TYP_INT) - { - signCheckOnly = true; - } - else - { - typeMask = 0xFFFFFFFF00000000LL; - } - break; - - case TYP_LONG: - noway_assert(srcType == TYP_ULONG); - signCheckOnly = true; - break; + if (castInfo.requiresOverflowCheck) + { - case TYP_ULONG: - noway_assert((srcType == TYP_LONG) || (srcType == TYP_INT)); - signCheckOnly = true; - break; + emitAttr cmpSize = EA_ATTR(genTypeSize(srcType)); - default: - NO_WAY("Unknown type"); - return; - } - - if (signCheckOnly) + if (castInfo.signCheckOnly) { // We only need to check for a negative value in sourceReg emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, 0); @@ -5616,34 +5627,52 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) movRequired = true; } } - else + else if (castInfo.unsignedSource || castInfo.unsignedDest) { // When we are converting from/to unsigned, // we only have to check for any bits set in 'typeMask' - if (isUnsignedSrc || isUnsignedDst) + + noway_assert(castInfo.typeMask != 0); + emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, castInfo.typeMask); + genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); + } + else + { + // For a narrowing signed cast + // + // We must check the value is in a signed range. + + // Compare with the MAX + + noway_assert((castInfo.typeMin != 0) && (castInfo.typeMax != 0)); + + if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, cmpSize)) { - noway_assert(typeMask != 0); - emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, typeMask); - genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); + emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMax); } else { - // For a narrowing signed cast - // - // We must check the value is in a signed range. - - // Compare with the MAX - - noway_assert((typeMin != 0) && (typeMax != 0)); + noway_assert(tmpReg != REG_NA); + instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMax); + emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg); + } - emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, typeMax); - genJumpToThrowHlpBlk(EJ_jg, SCK_OVERFLOW); + genJumpToThrowHlpBlk(EJ_jg, SCK_OVERFLOW); - // Compare with the MIN + // Compare with the MIN - emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, typeMin); - genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW); + if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, cmpSize)) + { + emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMin); + } + else + { + noway_assert(tmpReg != REG_NA); + instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMin); + emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg); } + + genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW); } ins = INS_mov; } @@ -5655,23 +5684,39 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) } else { - var_types extendType; + var_types extendType = TYP_UNKNOWN; - if (genTypeSize(srcType) < genTypeSize(dstType)) + // If we need to treat a signed type as unsigned + if ((treeNode->gtFlags & GTF_UNSIGNED) != 0) { - extendType = srcType; - if (srcType == TYP_UINT) - { - movSize = EA_4BYTE; // force a mov EA_4BYTE to zero the upper bits - movRequired = true; - } + extendType = genUnsignedType(srcType); + movSize = emitTypeSize(extendType); + movRequired = true; } - else // (genTypeSize(srcType) > genTypeSize(dstType)) + + else { - extendType = dstType; - if (dstType == TYP_INT) + if (genTypeSize(srcType) < genTypeSize(dstType)) { - movSize = EA_8BYTE; // a sxtw instruction requires EA_8BYTE + extendType = srcType; + if (srcType == TYP_UINT) + { + // If we are casting from a smaller type to + // a larger type, then we need to make sure the + // higher 4 bytes are zero to gaurentee the correct value. + // Therefore using a mov with EA_4BYTE in place of EA_8BYTE + // will zero the upper bits + movSize = EA_4BYTE; + movRequired = true; + } + } + else // (genTypeSize(srcType) > genTypeSize(dstType)) + { + extendType = dstType; + if (dstType == TYP_INT) + { + movSize = EA_8BYTE; // a sxtw instruction requires EA_8BYTE + } } } @@ -5718,17 +5763,23 @@ CodeGen::genFloatToFloatCast(GenTreePtr treeNode) var_types dstType = treeNode->CastToType(); var_types srcType = op1->TypeGet(); assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); - assert(srcType != dstType); // Must specify two different types - - insOpts cvtOption = (srcType == TYP_FLOAT) ? INS_OPTS_S_TO_D // convert Single to Double - : INS_OPTS_D_TO_S; // convert Double to Single genConsumeOperands(treeNode->AsOp()); // treeNode must be a reg assert(!treeNode->isContained()); - getEmitter()->emitIns_R_R(INS_fcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum, cvtOption); + if (srcType != dstType) + { + insOpts cvtOption = (srcType == TYP_FLOAT) ? INS_OPTS_S_TO_D // convert Single to Double + : INS_OPTS_D_TO_S; // convert Double to Single + + getEmitter()->emitIns_R_R(INS_fcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum, cvtOption); + } + else if (treeNode->gtRegNum != op1->gtRegNum) // If double to double cast or float to float cast. Emit a move instruction. + { + getEmitter()->emitIns_R_R(INS_mov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum); + } genProduceReg(treeNode); } @@ -5920,39 +5971,34 @@ CodeGen::genCkfinite(GenTreePtr treeNode) { assert(treeNode->OperGet() == GT_CKFINITE); -#if 0 GenTreePtr op1 = treeNode->gtOp.gtOp1; var_types targetType = treeNode->TypeGet(); - int expMask = (targetType == TYP_FLOAT) ? 0x7F800000 : 0x7FF00000; // Bit mask to extract exponent. + int expMask = (targetType == TYP_FLOAT) ? 0x7F8 : 0x7FF; // Bit mask to extract exponent. + int shiftAmount = targetType == TYP_FLOAT ? 20 : 52; + + emitter * emit = getEmitter(); // Extract exponent into a register. - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); - - inst_RV_RV(INS_mov_xmm2i, genConsumeReg(op1), tmpReg, targetType); - if (targetType == TYP_DOUBLE) - { - // right shift by 32 bits to get to exponent. - inst_RV_SH(INS_shr, EA_8BYTE, tmpReg, 32); - } + regNumber intReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber fpReg = genConsumeReg(op1); + assert(intReg != REG_NA); + + emit->emitIns_R_R(ins_Copy(targetType), emitTypeSize(treeNode), intReg, fpReg); + emit->emitIns_R_R_I(INS_lsr, emitTypeSize(targetType), intReg, intReg, shiftAmount); // Mask of exponent with all 1's and check if the exponent is all 1's - inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE); - inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE); + emit->emitIns_R_R_I(INS_and, EA_4BYTE, intReg, intReg, expMask); + emit->emitIns_R_I(INS_cmp, EA_4BYTE, intReg, expMask); // If exponent is all 1's, throw ArithmeticException genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN); // if it is a finite value copy it to targetReg - if (treeNode->gtRegNum != op1->gtRegNum) + if (treeNode->gtRegNum != fpReg) { - inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType); + emit->emitIns_R_R(ins_Copy(targetType), emitTypeSize(treeNode), treeNode->gtRegNum, fpReg); } genProduceReg(treeNode); -#else // !0 - NYI("genCkfinite"); -#endif // !0 } int CodeGenInterface::genSPtoFPdelta() @@ -6029,42 +6075,145 @@ int CodeGenInterface::genCallerSPtoInitialSPdelta() //--------------------------------------------------------------------- -// genMathIntrinsic - generate code for a given math intrinsic +// genIntrinsic - generate code for a given intrinsic // // Arguments -// treeNode - the GT_MATH node +// treeNode - the GT_INTRINSIC node // // Return value: // None // void -CodeGen::genMathIntrinsic(GenTreePtr treeNode) -{ -#if 0 - // Right now only Sqrt/Abs are treated as math intrinsics. - switch(treeNode->gtMath.gtMathFN) +CodeGen::genIntrinsic(GenTreePtr treeNode) +{ + // Both operand and its result must be of the same floating point type. + GenTreePtr srcNode = treeNode->gtOp.gtOp1; + assert(varTypeIsFloating(srcNode)); + assert(srcNode->TypeGet() == treeNode->TypeGet()); + + // Right now only Abs/Round/Sqrt are treated as math intrinsics. + // + switch(treeNode->gtIntrinsic.gtIntrinsicId) { - case CORINFO_INTRINSIC_Sqrt: - noway_assert(treeNode->TypeGet() == TYP_DOUBLE); - genConsumeOperands(treeNode->AsOp()); - getEmitter()->emitInsBinary(INS_sqrtsd, emitTypeSize(treeNode), treeNode, treeNode->gtOp.gtOp1); - break; + case CORINFO_INTRINSIC_Abs: + genConsumeOperands(treeNode->AsOp()); + getEmitter()->emitInsBinary(INS_fabs, emitTypeSize(treeNode), treeNode, srcNode); + break; - case CORINFO_INTRINSIC_Abs: - genSSE2BitwiseOp(treeNode); - break; + case CORINFO_INTRINSIC_Round: + genConsumeOperands(treeNode->AsOp()); + getEmitter()->emitInsBinary(INS_frinta, emitTypeSize(treeNode), treeNode, srcNode); + break; + + case CORINFO_INTRINSIC_Sqrt: + genConsumeOperands(treeNode->AsOp()); + getEmitter()->emitInsBinary(INS_fsqrt, emitTypeSize(treeNode), treeNode, srcNode); + break; - default: - assert(!"genMathIntrinsic: Unsupported math intrinsic"); - unreached(); + default: + assert(!"genIntrinsic: Unsupported intrinsic"); + unreached(); } genProduceReg(treeNode); -#else // !0 - NYI("genMathIntrinsic"); -#endif // !0 } +//--------------------------------------------------------------------- +// genCodeForLdObj - generate code for a GT_LDOBJ node +// +// Arguments +// treeNode - the GT_LDOBJ node +// +// Return value: +// None +// + +void CodeGen::genCodeForLdObj(GenTreeOp* treeNode) +{ + assert(treeNode->OperGet() == GT_LDOBJ); + + GenTree* addr = treeNode->gtOp.gtOp1; + genConsumeAddress(addr); + + regNumber addrReg = addr->gtRegNum; + regNumber targetReg = treeNode->gtRegNum; + var_types targetType = treeNode->TypeGet(); + emitter * emit = getEmitter(); + + noway_assert(targetType == TYP_STRUCT); + noway_assert(targetReg != REG_NA); + + CORINFO_CLASS_HANDLE ldObjClass = treeNode->gtLdObj.gtClass; + int structSize = compiler->info.compCompHnd->getClassSize(ldObjClass); + noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES); + + // For a 16-byte structSize we will use a ldp instruction to load two registers + // ldp x2, x3, [x0] + // + // For a 12-byte structSize we will we will generate two load instructions + // ldr x2, [x0] + // ldr w3, [x0, #8] + // + // When the first instruction has a targetReg that is the same register + // as the source register: addrReg, we set deferLoad to true and + // issue the intructions in the reverse order: + // ldr w3, [x2, #8] + // ldr x2, [x2] + + bool deferLoad = false; + emitAttr deferAttr = EA_PTRSIZE; + int deferOffset = 0; + int remainingSize = structSize; + unsigned structOffset = 0; + + // Use the ldp instruction for a struct that is exactly 16-bytes in size + // ldp x2, x3, [x0] + // + if (remainingSize == 2*TARGET_POINTER_SIZE) + { + remainingSize -= TARGET_POINTER_SIZE; + remainingSize -= TARGET_POINTER_SIZE; + getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, targetReg, REG_NEXT(targetReg), addrReg, structOffset); + } + + while (remainingSize > 0) + { + if (remainingSize >= TARGET_POINTER_SIZE) + { + remainingSize -= TARGET_POINTER_SIZE; + + if ((targetReg != addrReg) || (remainingSize == 0)) + { + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, targetReg, addrReg, structOffset); + } + else + { + deferLoad = true; + deferAttr = EA_PTRSIZE; + deferOffset = structOffset; + } + targetReg = REG_NEXT(targetReg); + structOffset += TARGET_POINTER_SIZE; + } + else // (remainingSize < TARGET_POINTER_SIZE) + { + int loadSize = remainingSize; + noway_assert((loadSize == 4) || (loadSize == 2) || (loadSize == 1)); + remainingSize = 0; + + getEmitter()->emitIns_R_R_I(INS_ldr, emitAttr(loadSize), targetReg, addrReg, structOffset); + } + } + + if (deferLoad) + { + targetReg = addrReg; + getEmitter()->emitIns_R_R_I(INS_ldr, deferAttr, targetReg, addrReg, deferOffset); + } + genProduceReg(treeNode); +} + + /***************************************************************************** * * Create and record GC Info for the function. @@ -6149,7 +6298,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, NYI("genEmitHelperCall indirect"); #if 0 assert(pAddr != nullptr); - if (genAddrShouldUsePCRel((size_t)pAddr)) + if (genAddrCanBeEncodedAsPCRelOffset((size_t)pAddr)) { // generate call whose target is specified by PC-relative 32-bit offset. callType = emitter::EC_FUNC_TOKEN_INDIR; diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 68e8c1074e..40dec4a791 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -155,6 +155,9 @@ CodeGen::CodeGen(Compiler * theCompiler) : #ifdef _TARGET_AMD64_ // This will be set before final frame layout. compiler->compVSQuirkStackPaddingNeeded = 0; + + // Set to true if we perform the Quirk that fixes the PPP issue + compiler->compQuirkForPPPflag = false; #endif // _TARGET_AMD64_ #ifdef LEGACY_BACKEND @@ -183,11 +186,13 @@ void CodeGenInterface::genMarkTreeInReg(GenTreePtr tree, regNumber reg) tree->gtFlags |= GTF_REG_VAL; } +#if CPU_LONG_USES_REGPAIR void CodeGenInterface::genMarkTreeInRegPair(GenTreePtr tree, regPairNo regPair) { tree->gtRegPair = regPair; tree->gtFlags |= GTF_REG_VAL; } +#endif #if defined(_TARGET_X86_) || defined(_TARGET_ARM_) @@ -715,12 +720,12 @@ void CodeGenInterface::genUpdateRegLife(const LclVarDsc * varDsc, // We'd like to be able to assert the following, however if we are walking // through a qmark/colon tree, we may encounter multiple last-use nodes. // assert((regSet.rsMaskVars & regMask) == regMask); - regSet.rsMaskVars &= ~(regMask); + regSet.RemoveMaskVars(regMask); } else { assert((regSet.rsMaskVars & regMask) == 0); - regSet.rsMaskVars |= regMask; + regSet.AddMaskVars(regMask); } } @@ -1485,12 +1490,145 @@ regNumber CodeGenInterface::genGetThisArgReg(GenTreePtr call) } #ifdef _TARGET_XARCH_ -// return true if this relocatable address should use IP-rel -bool CodeGenInterface::genAddrShouldUsePCRel(size_t addr) + +#ifdef _TARGET_AMD64_ +// Returns relocation type hint for an addr. +// Note that there are no reloc hints on x86. +// +// Arguments +// addr - data address +// +// Returns +// relocation type hint +// +unsigned short CodeGenInterface::genAddrRelocTypeHint(size_t addr) { - return compiler->info.compCompHnd->getRelocTypeHint((void *)addr) == IMAGE_REL_BASED_REL32; + return compiler->eeGetRelocTypeHint((void *)addr); +} +#endif //_TARGET_AMD64_ + +// Return true if an absolute indirect data address can be encoded as IP-relative. +// offset. Note that this method should be used only when the caller knows that +// the address is an icon value that VM has given and there is no GenTree node +// representing it. Otherwise, one should always use FitsInAddrBase(). +// +// Arguments +// addr - an absolute indirect data address +// +// Returns +// true if indir data addr could be encoded as IP-relative offset. +// +bool CodeGenInterface::genDataIndirAddrCanBeEncodedAsPCRelOffset(size_t addr) +{ +#ifdef _TARGET_AMD64_ + return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32; +#else + // x86: PC-relative addressing is available only for control flow instructions (jmp and call) + return false; +#endif } + +// Return true if an indirect code address can be encoded as IP-relative offset. +// Note that this method should be used only when the caller knows that the +// address is an icon value that VM has given and there is no GenTree node +// representing it. Otherwise, one should always use FitsInAddrBase(). +// +// Arguments +// addr - an absolute indirect code address +// +// Returns +// true if indir code addr could be encoded as IP-relative offset. +// +bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsPCRelOffset(size_t addr) +{ +#ifdef _TARGET_AMD64_ + return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32; +#else + // x86: PC-relative addressing is available only for control flow instructions (jmp and call) + return true; #endif +} + +// Return true if an indirect code address can be encoded as 32-bit displacement +// relative to zero. Note that this method should be used only when the caller +// knows that the address is an icon value that VM has given and there is no +// GenTree node representing it. Otherwise, one should always use FitsInAddrBase(). +// +// Arguments +// addr - absolute indirect code address +// +// Returns +// true if absolute indir code addr could be encoded as 32-bit displacement relative to zero. +// +bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsZeroRelOffset(size_t addr) +{ + return GenTreeIntConCommon::FitsInI32((ssize_t)addr); +} + +// Return true if an absolute indirect code address needs a relocation recorded with VM. +// +// Arguments +// addr - an absolute indirect code address +// +// Returns +// true if indir code addr needs a relocation recorded with VM +// +bool CodeGenInterface::genCodeIndirAddrNeedsReloc(size_t addr) +{ + // If generating relocatable ngen code, then all code addr should go through relocation + if (compiler->opts.compReloc) + { + return true; + } + + // Else jitting. + +#ifdef _TARGET_AMD64_ + // If code addr could be encoded as 32-bit offset relative to IP, we need to record a relocation. + if (genCodeIndirAddrCanBeEncodedAsPCRelOffset(addr)) + { + return true; + } + + // It could be possible that the code indir addr could be encoded as 32-bit displacement relative + // to zero. But we don't need to emit a relocation in that case. + return false; +#else //_TARGET_X86_ + // On x86 there is need for recording relocations during jitting, + // because all addrs fit within 32-bits. + return false; +#endif //_TARGET_X86_ +} + +// Return true if a direct code address needs to be marked as relocatable. +// +// Arguments +// addr - absolute direct code address +// +// Returns +// true if direct code addr needs a relocation recorded with VM +// +bool CodeGenInterface::genCodeAddrNeedsReloc(size_t addr) +{ + // If generating relocatable ngen code, then all code addr should go through relocation + if (compiler->opts.compReloc) + { + return true; + } + + // Else jitting. + +#ifdef _TARGET_AMD64_ + // By default all direct code addresses go through relocation so that VM will setup + // a jump stub if addr cannot be encoded as pc-relative offset. + return true; +#else //_TARGET_X86_ + // On x86 there is no need for recording relocations during jitting, + // because all addrs fit within 32-bits. + return false; +#endif //_TARGET_X86_ +} +#endif //_TARGET_XARCH_ /***************************************************************************** @@ -3767,11 +3905,11 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, int slots = 0; #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; if (varDsc->TypeGet() == TYP_STRUCT) { CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); assert(typeHnd != nullptr); + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); if (!structDesc.passedInRegisters) { @@ -3834,6 +3972,31 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, regArgTab[regArgNum].slot = 1; slots = 1; + +#if FEATURE_MULTIREG_STRUCT_ARGS +#ifdef _TARGET_ARM64_ + if (varDsc->TypeGet() == TYP_STRUCT) + { + if (varDsc->lvExactSize > REGSIZE_BYTES) + { + assert(varDsc->lvExactSize <= 2*REGSIZE_BYTES); + + // Note that regArgNum+1 represents an argument index not an actual argument register. + // see genMapRegArgNumToRegNum(unsigned argNum, var_types type) + + // This is the setup for the second half of a MULTIREG struct arg + noway_assert(regArgNum+1 < regState->rsCalleeRegArgNum); + // we better not have added it already (there better not be multiple vars representing this argument register) + noway_assert(regArgTab[regArgNum+1].slot == 0); + + regArgTab[regArgNum+1].varNum = varNum; + regArgTab[regArgNum+1].slot = 2; + + slots++; + } + } +#endif // _TARGET_ARM64_ +#endif // FEATURE_MULTIREG_STRUCT_ARGS } #ifdef _TARGET_ARM_ @@ -4013,13 +4176,13 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, { destRegNum = varDsc->lvRegNum; } -#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if FEATURE_MULTIREG_STRUCT_ARGS && !defined(_TARGET_ARM_) else { assert(regArgTab[argNum].slot == 2); destRegNum = varDsc->lvOtherReg; } -#else // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#else // FEATURE_MULTIREG_STRUCT_ARGS && !defined(_TARGET_ARM_) else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG) { @@ -4031,7 +4194,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, assert(varDsc->TypeGet() == TYP_DOUBLE); destRegNum = REG_NEXT(varDsc->lvRegNum); } -#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#endif // FEATURE_MULTIREG_STRUCT_ARGS && !defined(_TARGET_ARM_) if (genRegMask(destRegNum) & regArgMaskLive) { /* we are trashing a live argument register - record it */ @@ -4149,7 +4312,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, var_types storeType = TYP_UNDEF; #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) - if (varDsc->TypeGet() == TYP_STRUCT) + if (varTypeIsStruct(varDsc)) { size = EA_SIZE(varDsc->lvSize()); #if defined(_TARGET_AMD64_) @@ -4164,14 +4327,10 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #elif defined(_TARGET_ARM64_) // Must be <= 16 bytes or else it wouldn't be passed in registers - noway_assert(EA_SIZE_IN_BYTES(size) <= 16); - // For now, if the struct is between (8, 16] bytes let's NYI since we would - // need to allocate a register pair for this. - if (size > 8) - { - NYI_ARM64("Struct parameter of size between 9 and 16 bytes needs a register pair"); - } - storeType = TYP_I_IMPL; + noway_assert(EA_SIZE_IN_BYTES(size) <= MAX_PASS_MULTIREG_BYTES); + + storeType = TYP_I_IMPL; + size = emitActualTypeSize(storeType); #endif // _TARGET_ARM64_ } else @@ -4205,16 +4364,17 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, } else { + // Since slot is typically 1, baseOffset is typically 0 + int baseOffset = (regArgTab[argNum].slot - 1) * TARGET_POINTER_SIZE; + getEmitter()->emitIns_S_R(ins_Store(storeType), - size, - srcRegNum, - varNum, - (regArgTab[argNum].slot-1) * TARGET_POINTER_SIZE); + size, + srcRegNum, + varNum, + baseOffset); -#ifdef DEBUGGING_SUPPORT - if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0) && regArgTab[argNum].slot == 1) + if (regArgTab[argNum].slot == 1) psiMoveToStack(varNum); -#endif } /* mark the argument as processed */ @@ -4360,10 +4520,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, regArgMaskLive &= ~genRegMask(varDscSrc->lvArgReg); regArgMaskLive &= ~genRegMask(varDscDest->lvArgReg); -#ifdef DEBUGGING_SUPPORT psiMoveToReg(varNumSrc ); psiMoveToReg(varNumDest); -#endif } else #endif // _TARGET_XARCH_ @@ -4425,9 +4583,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, *pXtraRegClobbered = true; -#ifdef DEBUGGING_SUPPORT psiMoveToReg(varNumDest, xtraReg); -#endif /* start moving everything to its right place */ @@ -4496,9 +4652,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, regTracker.rsTrackRegCopy(destRegNum, xtraReg); -#ifdef DEBUGGING_SUPPORT psiMoveToReg(varNumSrc); -#endif /* mark the beginning register as processed */ @@ -4654,9 +4808,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, getEmitter()->emitIns_R_R(ins_Copy(destMemType), size, destRegNum, regNum); -#ifdef DEBUGGING_SUPPORT psiMoveToReg(varNum); -#endif } /* mark the argument as processed */ @@ -4807,11 +4959,7 @@ void CodeGen::genEnregisterIncomingStackArgs() regTracker.rsTrackRegTrash(regNum); } - -#ifdef DEBUGGING_SUPPORT psiMoveToReg(varNum); -#endif - } } @@ -4931,7 +5079,15 @@ void CodeGen::genCheckUseBlockInit() /* With compInitMem, all untracked vars will have to be init'ed */ /* VSW 102460 - Do not force initialization of compiler generated temps, unless they are untracked GC type or structs that contain GC pointers */ - if ((!varDsc->lvTracked || varDsc->lvType == TYP_STRUCT) && varDsc->lvOnFrame && +#if FEATURE_SIMD + // TODO-1stClassStructs + // This is here to duplicate previous behavior, where TYP_SIMD8 locals + // were not being re-typed correctly. + if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT) || (varDsc->lvType == TYP_SIMD8)) && +#else // !FEATURE_SIMD + if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) && +#endif // !FEATURE_SIMD + varDsc->lvOnFrame && (!varDsc->lvIsTemp || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0))) { varDsc->lvMustInit = true; @@ -5191,14 +5347,15 @@ void CodeGen::genPushCalleeSavedRegisters() assert((maskSaveRegsInt & RBM_FP) != 0); assert((maskSaveRegsInt & RBM_LR) != 0); - if ((compiler->lvaOutgoingArgSpaceSize == 0) && (-totalFrameSize >= -512)) + if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512)) { // Case #1. // // Generate: // stp fp,lr,[sp,#-framesz]! // - // The (-totalFrameSize >= -512) condition ensures that the predecrement of SP can occur with STP. + // The (totalFrameSize < 512) condition ensures that both the predecrement + // and the postincrement of SP can occur with STP. // // After saving callee-saved registers, we establish the frame pointer with: // mov fp,sp @@ -5423,10 +5580,9 @@ void CodeGen::genPushCalleeSavedRegisters() inst_RV(INS_push, reg, TYP_REF); compiler->unwindPush(reg); -#ifdef DEBUGGING_SUPPORT - if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0) && !doubleAlignOrFramePointerUsed()) + if (!doubleAlignOrFramePointerUsed()) psiAdjustStackLevel(REGSIZE_BYTES); -#endif + rsPushRegs &= ~regBit; } } @@ -5457,14 +5613,14 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg)); #endif // _TARGET_ARM_ -#ifdef _TARGET_X86_ +#ifdef _TARGET_XARCH_ if (frameSize == REGSIZE_BYTES) { - // Frame size is 4 + // Frame size is the same as register size. inst_RV(INS_push, REG_EAX, TYP_I_IMPL); } else -#endif // _TARGET_X86_ +#endif // _TARGET_XARCH_ if (frameSize < CORINFO_PAGE_SIZE) { #ifndef _TARGET_ARM64_ @@ -5656,10 +5812,8 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, #ifndef _TARGET_ARM64_ compiler->unwindAllocStack(frameSize); -#ifdef DEBUGGING_SUPPORT - if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0) && !doubleAlignOrFramePointerUsed()) + if (!doubleAlignOrFramePointerUsed()) psiAdjustStackLevel(frameSize); -#endif #endif // !_TARGET_ARM64_ } @@ -6043,7 +6197,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool if (isFramePointerUsed()) { - if ((compiler->lvaOutgoingArgSpaceSize == 0) && (-totalFrameSize >= -512)) + if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512)) { frameType = 1; @@ -6579,7 +6733,9 @@ void CodeGen::genZeroInitFrame(int untrLclHi, } #endif // !_TARGET_64BIT_ - if ((varDsc->TypeGet() == TYP_STRUCT) && !compiler->info.compInitMem) + if ((varDsc->TypeGet() == TYP_STRUCT) && + !compiler->info.compInitMem && + (varDsc->lvExactSize >= TARGET_POINTER_SIZE)) { // We only initialize the GC variables in the TYP_STRUCT const unsigned slots = (unsigned)compiler->lvaLclSize(varNum) / REGSIZE_BYTES; @@ -8391,23 +8547,14 @@ void CodeGen::genFnProlog() { inst_RV (INS_push, REG_FPBASE, TYP_REF); compiler->unwindPush(REG_FPBASE); - -#ifdef DEBUGGING_SUPPORT - if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0)) - psiAdjustStackLevel(sizeof(int)); -#endif + psiAdjustStackLevel(REGSIZE_BYTES); #ifndef _TARGET_AMD64_ // On AMD64, establish the frame pointer after the "sub rsp" inst_RV_RV(INS_mov, REG_FPBASE, REG_SPBASE); compiler->unwindSetFrameReg(REG_FPBASE, 0); - -#ifdef DEBUGGING_SUPPORT - if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0)) - psiMoveESPtoEBP(); -#endif + psiMoveESPtoEBP(); #if DOUBLE_ALIGN - if (compiler->genDoubleAlign()) { noway_assert(isFramePointerUsed() == false); @@ -8415,7 +8562,7 @@ void CodeGen::genFnProlog() inst_RV_IV(INS_AND, REG_SPBASE, -8, EA_PTRSIZE); } -#endif +#endif // DOUBLE_ALIGN #endif // !_TARGET_AMD64_ } #endif // _TARGET_XARCH_ @@ -10098,7 +10245,7 @@ void CodeGen::genSetPSPSym(regNumber initReg, { assert(compiler->compGeneratingProlog); - if (!compiler->ehNeedsPSPSym()) + if (!compiler->ehNeedsPSPSym()) return; noway_assert(isFramePointerUsed()); // We need an explicit frame pointer @@ -10154,7 +10301,7 @@ void CodeGen::genSetPSPSym(regNumber initReg, regNumber regTmp = initReg; *pInitRegZeroed = false; - getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta); + getEmitter()->emitIns_R_R_Imm(INS_add, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta); getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); #elif defined(_TARGET_AMD64_) @@ -10601,7 +10748,7 @@ void CodeGen::genPInvokeCallEpilog(LclVarDsc * frameListRoot, 0); #elif defined(_TARGET_AMD64_) - if (IMAGE_REL_BASED_REL32 != compiler->info.compCompHnd->getRelocTypeHint(addrOfCaptureThreadGlobal)) + if (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint(addrOfCaptureThreadGlobal)) { instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)addrOfCaptureThreadGlobal); @@ -10644,7 +10791,7 @@ void CodeGen::genPInvokeCallEpilog(LclVarDsc * frameListRoot, #else // !_TARGET_ARM_ #ifdef _TARGET_AMD64_ - if (IMAGE_REL_BASED_REL32 != compiler->info.compCompHnd->getRelocTypeHint(pAddrOfCaptureThreadGlobal)) + if (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint(pAddrOfCaptureThreadGlobal)) { instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, REG_ECX, (ssize_t)pAddrOfCaptureThreadGlobal); getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ECX, REG_ECX, 0); @@ -10957,9 +11104,9 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize) //------------------------------------------------------------------------ -// ARM-specific methods used by both the classic and RyuJIT +// Methods used to support FEATURE_MULTIREG_STRUCTS and HFA support for ARM32 //------------------------------------------------------------------------ -#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if FEATURE_MULTIREG_STRUCTS CORINFO_CLASS_HANDLE Compiler::GetStructClassHandle(GenTreePtr tree) { if (tree->TypeGet() == TYP_STRUCT) @@ -10971,16 +11118,11 @@ CORINFO_CLASS_HANDLE Compiler::GetStructClassHandle(GenTreePtr tree) switch (tree->OperGet()) { + default: + break; case GT_CALL: return tree->gtCall.gtRetClsHnd; - case GT_RET_EXPR: - return tree->gtRetExpr.gtRetClsHnd; - - case GT_RETURN: - assert(tree->gtOp.gtOp1->gtOper == GT_LCL_VAR); - return GetStructClassHandle(tree->gtOp.gtOp1); - case GT_LDOBJ: return tree->gtLdObj.gtClass; @@ -10994,13 +11136,20 @@ CORINFO_CLASS_HANDLE Compiler::GetStructClassHandle(GenTreePtr tree) case GT_ASG: assert(tree->gtOp.gtOp1->gtOper == GT_LCL_VAR || tree->gtOp.gtOp1->gtOper == GT_LCL_FLD); return GetStructClassHandle(tree->gtOp.gtOp1); - default: - return NO_CLASS_HANDLE; + +#if FEATURE_MULTIREG_STRUCT_RET + case GT_RET_EXPR: + return tree->gtRetExpr.gtRetClsHnd; + + case GT_RETURN: + assert(tree->gtOp.gtOp1->gtOper == GT_LCL_VAR); + return GetStructClassHandle(tree->gtOp.gtOp1); +#endif } } return NO_CLASS_HANDLE; } -#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#endif // FEATURE_MULTIREG_STRUCTS #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING bool Compiler::IsRegisterPassable(CORINFO_CLASS_HANDLE hClass) @@ -11282,6 +11431,7 @@ void CodeGen::genSetScopeInfo() #endif // !_TARGET_64BIT_ #ifdef FEATURE_SIMD + case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: case TYP_SIMD32: @@ -11313,6 +11463,7 @@ void CodeGen::genSetScopeInfo() case TYP_STRUCT: case TYP_BLK: //Needed because of the TYP_BLK stress mode #ifdef FEATURE_SIMD + case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: case TYP_SIMD32: diff --git a/src/jit/codegeninterface.h b/src/jit/codegeninterface.h index 672c51706d..077f4e57b0 100644 --- a/src/jit/codegeninterface.h +++ b/src/jit/codegeninterface.h @@ -178,7 +178,15 @@ public: regNumber genGetThisArgReg (GenTreePtr call); #ifdef _TARGET_XARCH_ - bool genAddrShouldUsePCRel(size_t addr); +#ifdef _TARGET_AMD64_ + // There are no reloc hints on x86 + unsigned short genAddrRelocTypeHint(size_t addr); +#endif + bool genDataIndirAddrCanBeEncodedAsPCRelOffset(size_t addr); + bool genCodeIndirAddrCanBeEncodedAsPCRelOffset(size_t addr); + bool genCodeIndirAddrCanBeEncodedAsZeroRelOffset(size_t addr); + bool genCodeIndirAddrNeedsReloc(size_t addr); + bool genCodeAddrNeedsReloc(size_t addr); #endif @@ -284,8 +292,9 @@ public: unsigned * alignmentWB); void genMarkTreeInReg (GenTreePtr tree, regNumber reg); +#if CPU_LONG_USES_REGPAIR void genMarkTreeInRegPair (GenTreePtr tree, regPairNo regPair); - +#endif // Methods to abstract target information bool validImmForInstr (instruction ins, ssize_t val, insFlags flags = INS_FLAGS_DONT_CARE); diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp index 0914f7d7d6..a0e1f96a30 100644 --- a/src/jit/codegenlegacy.cpp +++ b/src/jit/codegenlegacy.cpp @@ -111,7 +111,7 @@ void CodeGen::genDyingVars(VARSET_VALARG_TP beforeSet, #endif noway_assert((regSet.rsMaskVars & regBit) != 0); - regSet.rsMaskVars &= ~regBit; + regSet.RemoveMaskVars(regBit); // Remove GC tracking if any for this register @@ -2448,7 +2448,7 @@ regMaskTP CodeGen::genMakeAddressable(GenTreePtr tree, // Relocs can be left alone if they are RIP-relative. if ((genTypeSize(tree->TypeGet()) > 4) && (!tree->IsIntCnsFitsInI32() || (tree->IsIconHandle() && - (IMAGE_REL_BASED_REL32 != compiler->info.compCompHnd->getRelocTypeHint((void*)tree->gtIntCon.gtIconVal))))) + (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint((void*)tree->gtIntCon.gtIconVal))))) { break; } @@ -5804,7 +5804,7 @@ void CodeGen::genCodeForQmark(GenTreePtr tree, // So, pretend there aren't any, and spill them anyway. This will only occur // if rsAdditional is non-empty. regMaskTP rsTemp = regSet.rsMaskVars; - regSet.rsMaskVars = RBM_NONE; + regSet.ClearMaskVars(); regSet.rsSpillRegs(rsSpill); @@ -9237,12 +9237,9 @@ void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, genCodeForTree_DONE(tree, reg); return; + case GT_INTRINSIC: -#if INLINE_MATH - - case GT_MATH: - - switch (tree->gtMath.gtMathFN) + switch (tree->gtIntrinsic.gtIntrinsicId) { case CORINFO_INTRINSIC_Round: { @@ -9279,8 +9276,6 @@ void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, genCodeForTree_DONE(tree, reg); return; -#endif // INLINE_MATH - case GT_LCLHEAP: reg = genLclHeap(op1); @@ -10853,50 +10848,7 @@ UPPER_BITS_ZERO: break; case TYP_DOUBLE: - // if SSE2 is not enabled this can only be a DblWasInt case - if (!compiler->opts.compCanUseSSE2) - { - /* Using a call (to a helper-function) for this cast will cause - all FP variable which are live across the call to not be - enregistered. Since we know that compiler->gtDblWasInt() varaiables - will not overflow when cast to TYP_INT, we just use a - memory spill and load to do the cast and avoid the call */ - - assert(compiler->gtDblWasInt(op1)); - - /* Load the FP value onto the coprocessor stack */ - - genCodeForTreeFlt(op1); - - /* Allocate a temp for the result */ - - TempDsc * temp; - temp = compiler->tmpGetTemp(TYP_INT); - - /* Store the FP value into the temp */ - - inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0); - genFPstkLevel--; - - /* Pick a register for the value */ - - reg = regSet.rsPickReg(needReg); - - /* Load the converted value into the registers */ - - inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT, EA_4BYTE); - - /* The value in the register is now trashed */ - - regTracker.rsTrackRegTrash(reg); - - /* We no longer need the temp */ - - compiler->tmpRlsTemp(temp); - - genCodeForTree_DONE(tree, reg); - } - else + if (compiler->opts.compCanUseSSE2) { // do the SSE2 based cast inline // getting the fp operand @@ -12669,7 +12621,7 @@ void CodeGen::genCodeForBBlist() specialUseMask |= doubleAlignOrFramePointerUsed() ? RBM_SPBASE|RBM_FPBASE : RBM_SPBASE; - regSet.rsMaskVars = 0; + regSet.ClearMaskVars(); VarSetOps::ClearD(compiler, compiler->compCurLife); VarSetOps::Assign(compiler, liveSet, block->bbLiveIn); @@ -12707,7 +12659,7 @@ void CodeGen::genCodeForBBlist() regNumber regNum = varDsc->lvRegNum; regMaskTP regMask = genRegMask(regNum); - regSet.rsMaskVars |= regMask; + regSet.AddMaskVars(regMask); if (varDsc->lvType == TYP_REF) gcrefRegs |= regMask; @@ -18110,7 +18062,7 @@ void CodeGen::SetupLateArgs(GenTreePtr call) regMaskTP rsTemp = regSet.rsMaskVars & regSet.rsMaskUsed & RBM_CALLEE_TRASH; regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsTemp; regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsTemp; - regSet.rsMaskVars -= rsTemp; + regSet.RemoveMaskVars(rsTemp); regNumber regNum2 = regNum; for (unsigned i = 0; i < curArgTabEntry->numRegs; i++) @@ -18136,7 +18088,7 @@ void CodeGen::SetupLateArgs(GenTreePtr call) gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef; // Set maskvars back to normal - regSet.rsMaskVars |= rsTemp; + regSet.AddMaskVars(rsTemp); } /* Evaluate the argument to a register */ @@ -20277,7 +20229,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, // We keep regSet.rsMaskVars current during codegen, so we have to remove any // that have been copied into arg regs. - regSet.rsMaskVars &= ~(curArgMask); + regSet.RemoveMaskVars(curArgMask); gcInfo.gcRegGCrefSetCur &= ~(curArgMask); gcInfo.gcRegByrefSetCur &= ~(curArgMask); } diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h index a548e1727c..7b79111d2f 100644 --- a/src/jit/codegenlinear.h +++ b/src/jit/codegenlinear.h @@ -37,14 +37,26 @@ void genCkfinite(GenTreePtr treeNode); - void genMathIntrinsic(GenTreePtr treeNode); + void genIntrinsic(GenTreePtr treeNode); void genPutArgStk(GenTreePtr treeNode); unsigned getBaseVarForPutArgStk(GenTreePtr treeNode); + void genCompareFloat(GenTreePtr treeNode); + + void genCompareInt(GenTreePtr treeNode); + +#if !defined(_TARGET_64BIT_) + void genCompareLong(GenTreePtr treeNode); +#endif + +#ifdef _TARGET_ARM64_ + void genCodeForLdObj(GenTreeOp* treeNode); +#endif + #ifdef FEATURE_SIMD instruction getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned *ival = nullptr); - void genSIMDScalarMove(var_types type, regNumber target, regNumber src, bool zeroInit); + void genSIMDScalarMove(var_types type, regNumber target, regNumber src, bool zeroInit); void genSIMDIntrinsicInit(GenTreeSIMD* simdNode); void genSIMDIntrinsicInitN(GenTreeSIMD* simdNode); void genSIMDIntrinsicInitArray(GenTreeSIMD* simdNode); @@ -132,8 +144,7 @@ void genCodeForCpBlkUnroll (GenTreeCpBlk* cpBlkNode); #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING - void genPutStructArgStk(GenTreePtr treeNode - FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned baseVarNum)); + void genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum); void genStructPutArgRepMovs(GenTreePutArgStk* putArgStkNode, unsigned baseVarNum); void genStructPutArgUnroll(GenTreePutArgStk* putArgStkNode, unsigned baseVarNum); @@ -159,6 +170,10 @@ instruction genGetInsForOper (genTreeOps oper, var_types type); + void genStoreInd(GenTreePtr node); + + bool genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data); + void genCallInstruction(GenTreePtr call); void genJmpMethod(GenTreePtr jmp); diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 740c6e742b..74c8ac0f52 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -71,7 +71,7 @@ void CodeGen::genSpillVar(GenTreePtr tree) tree->gtFlags &= ~GTF_REG_VAL; instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum)); - +#if CPU_LONG_USES_REGPAIR if (varTypeIsMultiReg(tree)) { assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair)); @@ -82,6 +82,7 @@ void CodeGen::genSpillVar(GenTreePtr tree) inst_TT_RV(storeIns, tree, regHi, 4); } else +#endif { assert(varDsc->lvRegNum == tree->gtRegNum); inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size); @@ -388,7 +389,7 @@ void CodeGen::genCodeForBBlist() // Figure out which registers hold variables on entry to this block - regSet.rsMaskVars = RBM_NONE; + regSet.ClearMaskVars(); gcInfo.gcRegGCrefSetCur = RBM_NONE; gcInfo.gcRegByrefSetCur = RBM_NONE; @@ -445,23 +446,11 @@ void CodeGen::genCodeForBBlist() } } + regSet.rsMaskVars = newLiveRegSet; + #ifdef DEBUG if (compiler->verbose) { - printf("\t\t\t\t\t\t\tLive regs: "); - if (regSet.rsMaskVars == newLiveRegSet) - { - printf("(unchanged) "); - } - else - { - printRegMaskInt(regSet.rsMaskVars); - compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars); - printf(" => "); - } - printRegMaskInt(newLiveRegSet); - compiler->getEmitter()->emitDispRegSet(newLiveRegSet); - printf("\n"); if (!VarSetOps::IsEmpty(compiler, addedGCVars)) { printf("\t\t\t\t\t\t\tAdded GCVars: "); @@ -477,7 +466,6 @@ void CodeGen::genCodeForBBlist() } #endif // DEBUG - regSet.rsMaskVars = newLiveRegSet; gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUG_ARG(true)); gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUG_ARG(true)); @@ -952,8 +940,7 @@ void CodeGen::genCodeForBBlist() // Generate a RIP-relative // lea reg, [rip + disp32] ; the RIP is implicit // which will be position-indepenent. - // TODO-XArch-Bug?: For ngen, we need to generate a reloc for the displacement (maybe EA_PTR_DSP_RELOC). - getEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, block->bbJumpDest, REG_INTRET); + getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET); __fallthrough; case BBJ_EHFINALLYRET: @@ -1033,7 +1020,7 @@ sameRegAsDst(GenTree *tree, GenTree *&other /*out*/) } } -// move an immediate value into an integer register +// Move an immediate value into an integer register void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, @@ -1054,7 +1041,7 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, } else { - if (genAddrShouldUsePCRel(imm)) + if (genDataIndirAddrCanBeEncodedAsPCRelOffset(imm)) { getEmitter()->emitIns_R_AI(INS_lea, EA_PTR_DSP_RELOC, reg, imm); } @@ -1084,8 +1071,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types tar GenTreeIntConCommon* con = tree->AsIntConCommon(); ssize_t cnsVal = con->IconValue(); - bool needReloc = compiler->opts.compReloc && tree->IsIconHandle(); - if (needReloc) + if (con->ImmedValNeedsReloc(compiler)) { instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal); regTracker.rsTrackRegTrash(targetReg); @@ -1280,8 +1266,19 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode) genProduceReg(treeNode); } -// Generate code for ADD, SUB, AND XOR, and OR. -// mul and div variants have special constraints on x64 so are not handled here. +//------------------------------------------------------------------------ +// genCodeForBinary: Generate code for many binary arithmetic operators +// +// Arguments: +// treeNode - The binary operation for which we are generating code. +// +// Return Value: +// None. +// +// Notes: +// Mul and div variants have special constraints on x64 so are not handled here. +// See teh assert below for the operators that are handled. + void CodeGen::genCodeForBinary(GenTree* treeNode) { const genTreeOps oper = treeNode->OperGet(); @@ -1289,14 +1286,38 @@ void CodeGen::genCodeForBinary(GenTree* treeNode) var_types targetType = treeNode->TypeGet(); emitter *emit = getEmitter(); +#if defined(_TARGET_64BIT_) + assert (oper == GT_OR || + oper == GT_XOR || + oper == GT_AND || + oper == GT_ADD || + oper == GT_SUB); +#else // !defined(_TARGET_64BIT_) assert (oper == GT_OR || oper == GT_XOR || oper == GT_AND || + oper == GT_ADD_HI || + oper == GT_SUB_HI || + oper == GT_MUL_HI || + oper == GT_DIV_HI || + oper == GT_MOD_HI || oper == GT_ADD || oper == GT_SUB); +#endif // !defined(_TARGET_64BIT_) GenTreePtr op1 = treeNode->gtGetOp1(); GenTreePtr op2 = treeNode->gtGetOp2(); + + // Commutative operations can mark op1 as contained to generate "op reg, memop/immed" + if (op1->isContained()) + { + assert(treeNode->OperIsCommutative()); + assert(op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl() || op1->IsIntCnsFitsInI32()); + + op1 = treeNode->gtGetOp2(); + op2 = treeNode->gtGetOp1(); + } + instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); // The arithmetic node must be sitting in a register (since it's not contained) @@ -1473,6 +1494,11 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) break; case GT_CNS_INT: +#ifdef _TARGET_X86_ + NYI_IF(treeNode->IsIconHandle(GTF_ICON_TLS_HDL), "TLS constants"); +#endif // _TARGET_X86_ + __fallthrough; + case GT_CNS_DBL: genSetRegToConst(targetReg, targetType, treeNode); genProduceReg(treeNode); @@ -1508,6 +1534,10 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) assert(varTypeIsIntegralOrI(treeNode)); __fallthrough; +#if !defined(_TARGET_64BIT_) + case GT_ADD_HI: + case GT_SUB_HI: +#endif // !defined(_TARGET_64BIT_) case GT_ADD: case GT_SUB: genCodeForBinary(treeNode); @@ -2181,8 +2211,8 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) genCodeForDivMod(treeNode->AsOp()); break; - case GT_MATH: - genMathIntrinsic(treeNode); + case GT_INTRINSIC: + genIntrinsic(treeNode); break; #ifdef FEATURE_SIMD @@ -2206,226 +2236,23 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) // TODO-XArch-CQ: Check for the case where we can simply transfer the carry bit to a register // (signed < or >= where targetReg != REG_NA) - GenTreeOp *tree = treeNode->AsOp(); - GenTreePtr op1 = tree->gtOp1; - GenTreePtr op2 = tree->gtOp2; + GenTreePtr op1 = treeNode->gtGetOp1(); var_types op1Type = op1->TypeGet(); - var_types op2Type = op2->TypeGet(); - -#if !defined(_TARGET_64BIT_) - NYI_IF(varTypeIsLong(op1Type) || varTypeIsLong(op2Type), "Comparison of longs"); -#endif // !defined(_TARGET_64BIT_) - - genConsumeOperands(tree); - instruction ins; - emitAttr cmpAttr; if (varTypeIsFloating(op1Type)) { - // SSE2 instruction ucomis[s|d] is performs unordered comparison and - // updates rFLAGS register as follows. - // Result of compare ZF PF CF - // ----------------- ------------ - // Unordered 1 1 1 <-- this result implies one of operands of compare is a NAN. - // Greater 0 0 0 - // Less Than 0 0 1 - // Equal 1 0 0 - // - // From the above table the following equalities follow. As per ECMA spec *.UN opcodes perform - // unordered comparison of floating point values. That is *.UN comparisons result in true when - // one of the operands is a NaN whereas ordered comparisons results in false. - // - // Opcode Amd64 equivalent Comment - // ------ ----------------- -------- - // BLT.UN(a,b) ucomis[s|d] a, b Jb branches if CF=1, which means either a<b or unordered from the above table. - // jb - // - // BLT(a,b) ucomis[s|d] b, a Ja branches if CF=0 and ZF=0, which means b>a that in turn implies a<b - // ja - // - // BGT.UN(a,b) ucomis[s|d] b, a branch if b<a or unordered ==> branch if a>b or unordered - // jb - // - // BGT(a, b) ucomis[s|d] a, b branch if a>b - // ja - // - // BLE.UN(a,b) ucomis[s|d] a, b jbe branches if CF=1 or ZF=1, which implies a<=b or unordered - // jbe - // - // BLE(a,b) ucomis[s|d] b, a jae branches if CF=0, which mean b>=a or a<=b - // jae - // - // BGE.UN(a,b) ucomis[s|d] b, a branch if b<=a or unordered ==> branch if a>=b or unordered - // jbe - // - // BGE(a,b) ucomis[s|d] a, b branch if a>=b - // jae - // - // BEQ.UN(a,b) ucomis[s|d] a, b branch if a==b or unordered. There is no BEQ.UN opcode in ECMA spec. - // je This case is given for completeness, in case if JIT generates such - // a gentree internally. - // - // BEQ(a,b) ucomis[s|d] a, b From the above table, PF=0 and ZF=1 corresponds to a==b. - // jpe L1 - // je <true label> - // L1: - // - // BNE(a,b) ucomis[s|d] a, b branch if a!=b. There is no BNE opcode in ECMA spec. This case is - // jne given for completeness, in case if JIT generates such a gentree - // internally. - // - // BNE.UN(a,b) ucomis[s|d] a, b From the above table, PF=1 or ZF=0 implies unordered or a!=b - // jpe <true label> - // jne <true label> - // - // As we can see from the above equalities that the operands of a compare operator need to be - // reveresed in case of BLT/CLT, BGT.UN/CGT.UN, BLE/CLE, BGE.UN/CGE.UN. - - bool reverseOps; - if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0) - { - // Unordered comparison case - reverseOps = (tree->gtOper == GT_GT || tree->gtOper == GT_GE); - } - else - { - reverseOps = (tree->gtOper == GT_LT || tree->gtOper == GT_LE); - } - - if (reverseOps) - { - GenTreePtr tmp = op1; - op1 = op2; - op2 = tmp; - } - - ins = ins_FloatCompare(op1Type); - cmpAttr = emitTypeSize(op1Type); + genCompareFloat(treeNode); } - else // not varTypeIsFloating(op1Type) +#if !defined(_TARGET_64BIT_) + // X86 Long comparison + else if (varTypeIsLong(op1Type)) { - assert(!op1->isContainedIntOrIImmed()); // We no longer support swapping op1 and op2 to generate cmp reg, imm - assert(!varTypeIsFloating(op2Type)); - - // By default we use an int32 sized cmp instruction - // - ins = INS_cmp; - var_types cmpType = TYP_INT; - - // In the if/then/else statement below we may change the - // 'cmpType' and/or 'ins' to generate a smaller instruction - - // Are we comparing two values that are the same size? - // - if (genTypeSize(op1Type) == genTypeSize(op2Type)) - { - if (op1Type == op2Type) - { - // If both types are exactly the same we can use that type - cmpType = op1Type; - } - else if (genTypeSize(op1Type) == 8) - { - // If we have two different int64 types we need to use a long compare - cmpType = TYP_LONG; - } - - cmpAttr = emitTypeSize(cmpType); - } - else // Here we know that (op1Type != op2Type) - { - // Do we have a short compare against a constant in op2? - // - // We checked for this case in LowerCmp() and if we can perform a small - // compare immediate we labeled this compare with a GTF_RELOP_SMALL - // and for unsigned small non-equality compares the GTF_UNSIGNED flag. - // - if (op2->isContainedIntOrIImmed() && ((tree->gtFlags & GTF_RELOP_SMALL) != 0)) - { - assert(varTypeIsSmall(op1Type)); - cmpType = op1Type; - } - else // compare two different sized operands - { - // For this case we don't want any memory operands, only registers or immediates - // - assert(!op1->isContainedMemoryOp()); - assert(!op2->isContainedMemoryOp()); - - // Check for the case where one operand is an int64 type - // Lower should have placed 32-bit operand in a register - // for signed comparisons we will sign extend the 32-bit value in place. - // - bool op1Is64Bit = (genTypeSize(op1Type) == 8); - bool op2Is64Bit = (genTypeSize(op2Type) == 8); - if (op1Is64Bit) - { - cmpType = TYP_LONG; - if (!(treeNode->gtFlags & GTF_UNSIGNED) && !op2Is64Bit) - { - assert(op2->gtRegNum != REG_NA); -#ifdef _TARGET_X86_ - NYI_X86("64 bit sign extensions for x86/RyuJIT"); -#else // !_TARGET_X86_ - inst_RV_RV(INS_movsxd, op2->gtRegNum, op2->gtRegNum, op2Type); -#endif // !_TARGET_X86_ - } - } - else if (op2Is64Bit) - { - cmpType = TYP_LONG; - if (!(treeNode->gtFlags & GTF_UNSIGNED) && !op1Is64Bit) - { - assert(op1->gtRegNum != REG_NA); -#ifdef _TARGET_X86_ - NYI_X86("64 bit sign extensions for x86/RyuJIT"); -#else // !_TARGET_X86_ - inst_RV_RV(INS_movsxd, op1->gtRegNum, op1->gtRegNum, op1Type); -#endif // !_TARGET_X86_ - } - } - } - - cmpAttr = emitTypeSize(cmpType); - } - - // See if we can generate a "test" instruction instead of a "cmp". - // For this to generate the correct conditional branch we must have - // a compare against zero. - // - if (op2->IsZero()) - { - if (op1->isContained()) - { - // op1 can be a contained memory op - // or the special contained GT_AND that we created in Lowering::LowerCmp() - // - if ((op1->OperGet() == GT_AND)) - { - noway_assert(op1->gtOp.gtOp2->isContainedIntOrIImmed()); - - ins = INS_test; // we will generate "test andOp1, andOp2CnsVal" - op2 = op1->gtOp.gtOp2; // must assign op2 before we overwrite op1 - op1 = op1->gtOp.gtOp1; // overwrite op1 - // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2); - } - } - else // op1 is not contained thus it must be in a register - { - ins = INS_test; - op2 = op1; // we will generate "test reg1,reg1" - // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2); - } - } + genCompareLong(treeNode); } - - emit->emitInsBinary(ins, cmpAttr, op1, op2); - - // Are we evaluating this into a register? - if (targetReg != REG_NA) +#endif // !defined(_TARGET_64BIT_) + else { - genSetRegToCond(targetReg, tree); - genProduceReg(tree); + genCompareInt(treeNode); } } break; @@ -2500,113 +2327,7 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) break; case GT_STOREIND: - { -#ifdef FEATURE_SIMD - // Storing Vector3 of size 12 bytes through indirection - if (treeNode->TypeGet() == TYP_SIMD12) - { - genStoreIndTypeSIMD12(treeNode); - break; - } -#endif //FEATURE_SIMD - - GenTree* data = treeNode->gtOp.gtOp2; - GenTree* addr = treeNode->gtOp.gtOp1; - assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet())); - GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data); - if (writeBarrierForm != GCInfo::WBF_NoBarrier) - { - // data and addr must be in registers. - // Consume both registers so that any copies of interfering registers are taken care of. - genConsumeOperands(treeNode->AsOp()); - - // At this point, we should not have any interference. - // That is, 'data' must not be in REG_ARG_0, as that is where 'addr' must go. - noway_assert(data->gtRegNum != REG_ARG_0); - - // addr goes in REG_ARG_0 - if (addr->gtRegNum != REG_ARG_0) - { - inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet()); - } - - // data goes in REG_ARG_1 - if (data->gtRegNum != REG_ARG_1) - { - inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet()); - } - - genGCWriteBarrier(treeNode, writeBarrierForm); - } - else - { - bool reverseOps = ((treeNode->gtFlags & GTF_REVERSE_OPS) != 0); - bool dataIsUnary = false; - GenTree* nonRMWsrc = nullptr; - - // We must consume the operands in the proper execution order, so that liveness is - // updated appropriately. - if (!reverseOps) - { - genConsumeAddress(addr); - } - - if (data->isContained() && !data->OperIsLeaf()) - { - dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0); - if (!dataIsUnary) - { - nonRMWsrc = data->gtGetOp1(); - if (nonRMWsrc->isIndir() && Lowering::IndirsAreEquivalent(nonRMWsrc, treeNode)) - { - nonRMWsrc = data->gtGetOp2(); - } - genConsumeRegs(nonRMWsrc); - } - } - else - { - genConsumeRegs(data); - } - - if (reverseOps) - { - genConsumeAddress(addr); - } - - if (data->isContained() && !data->OperIsLeaf()) - { - if (dataIsUnary) - { - emit->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), - emitTypeSize(treeNode), - treeNode); - } - else - { - if (data->OperGet() == GT_LSH || - data->OperGet() == GT_RSH || - data->OperGet() == GT_RSZ || - data->OperGet() == GT_ROL || - data->OperGet() == GT_ROR) - { - genCodeForShift(addr, data->gtOp.gtOp2, data); - } - else - { - emit->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), - emitTypeSize(treeNode), - treeNode, - nonRMWsrc); - } - } - } - else - { - emit->emitInsMov(ins_Store(data->TypeGet()), emitTypeSize(treeNode), treeNode); - } - } - } + genStoreInd(treeNode); break; case GT_COPY: @@ -2817,7 +2538,7 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_LABEL: genPendingCallLabel = genCreateTempLabel(); treeNode->gtLabel.gtLabBB = genPendingCallLabel; - emit->emitIns_R_L(INS_lea, EA_PTRSIZE, genPendingCallLabel, treeNode->gtRegNum); + emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, genPendingCallLabel, treeNode->gtRegNum); break; case GT_COPYOBJ: @@ -2893,6 +2614,7 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_CLS_VAR_ADDR: getEmitter()->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0); + genProduceReg(treeNode); break; default: @@ -2937,7 +2659,13 @@ CodeGen::genStoreRegisterReturnInLclVar(GenTreePtr treeNode) compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); assert(structDesc.passedInRegisters); - assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + + // TODO-Amd64-Unix: Have Lubo Review this change + // Test case JIT.opt.ETW.TailCallCases.TailCallCases has eightByteCount == 1 + // This occurs with a TYP_STRUCT that is 3 bytes in size + // commenting out this assert results in correct codegen + // + // assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); GenTreePtr op1 = treeNode->gtOp.gtOp1; genConsumeRegs(op1); @@ -3480,11 +3208,23 @@ void CodeGen::genCodeForInitBlkRepStos(GenTreeInitBlk* initBlkNode) assert(!initVal->isContained()); assert(!blockSize->isContained()); +#ifdef _TARGET_AMD64_ assert(blockSize->gtSkipReloadOrCopy()->IsCnsIntOrI()); - size_t size = blockSize->gtIntCon.gtIconVal; - if (initVal->IsCnsIntOrI()) +#endif + +#ifdef _TARGET_X86_ + if (blockSize->gtSkipReloadOrCopy()->IsCnsIntOrI()) +#endif { - assert(size > INITBLK_UNROLL_LIMIT && size < INITBLK_STOS_LIMIT); + size_t size = blockSize->gtIntCon.gtIconVal; + if (initVal->IsCnsIntOrI()) + { +#ifdef _TARGET_AMD64_ + assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT); +#else + assert(size > CPBLK_UNROLL_LIMIT); +#endif + } } #endif // DEBUG @@ -3633,7 +3373,9 @@ void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst if (baseNode->OperIsLocalAddr()) { if (baseNode->gtOper == GT_LCL_FLD_ADDR) + { offset += baseNode->gtLclFld.gtLclOffs; + } emit->emitIns_R_S(ins, size, dst, baseNode->gtLclVarCommon.gtLclNum, offset); } else @@ -3652,7 +3394,6 @@ void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst // baseNode - the base, relative to which to store the src register. // offset - the offset that is added to the baseNode to calculate the address to store into. // - void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* baseNode, unsigned offset) { emitter *emit = getEmitter(); @@ -3711,8 +3452,8 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeCpBlk* cpBlkNode) size_t slots = size / XMM_REGSIZE_BYTES; // TODO: In the below code the load and store instructions are for 16 bytes, but the - // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but - // this probably needs to be changed. + // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but + // this probably needs to be changed. while (slots-- > 0) { // Load @@ -3780,15 +3521,23 @@ void CodeGen::genCodeForCpBlkRepMovs(GenTreeCpBlk* cpBlkNode) assert(!srcAddr->isContained()); assert(!blockSize->isContained()); + +#ifdef _TARGET_AMD64_ assert(blockSize->IsCnsIntOrI()); - size_t size = blockSize->gtIntCon.gtIconVal; +#endif + +#ifdef _TARGET_X86_ + if (blockSize->IsCnsIntOrI()) +#endif + { + size_t size = blockSize->gtIntCon.gtIconVal; #ifdef _TARGET_X64_ - assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT); + assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT); #else - assert(size > CPBLK_UNROLL_LIMIT); + assert(size > CPBLK_UNROLL_LIMIT); #endif - + } #endif // DEBUG genConsumeBlockOp(cpBlkNode, REG_RDI, REG_RSI, REG_RCX); @@ -3808,7 +3557,7 @@ void CodeGen::genCodeForCpBlkRepMovs(GenTreeCpBlk* cpBlkNode) // Need refactoring of copyblk before it could be used for putarg_stk. // The difference for now is that a putarg_stk contains its children, while cpyblk does not. // This creates differences in code. After some significant refactoring it could be reused. - +// void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseVarNum) { noway_assert(putArgNode->TypeGet() == TYP_STRUCT); @@ -3932,7 +3681,7 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseV // Preconditions: // The size argument of the PutArgStk (for structs) is a constant and is between // CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes. - +// void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned baseVarNum) { assert(putArgNode->TypeGet() == TYP_STRUCT); @@ -4153,7 +3902,7 @@ CodeGen::genTableBasedSwitch(GenTree* treeNode) // add it to the absolute address of fgFirstBB compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET; - getEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, compiler->fgFirstBB, tmpReg); + getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, tmpReg); getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, baseReg, tmpReg); // jmp baseReg getEmitter()->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), baseReg); @@ -4287,20 +4036,38 @@ CodeGen::genRangeCheck(GenTreePtr oper) if (arrIndex->isContainedIntOrIImmed()) { + // arrIndex is a contained constant. In this case + // we will generate one of the following + // cmp [mem], immed (if arrLen is a memory op) + // cmp reg, immed (if arrLen is in a reg) + // + // That is arrLen cannot be a contained immed. + assert(!arrLen->isContainedIntOrIImmed()); + src1 = arrLen; src2 = arrIndex; jmpKind = EJ_jbe; } else { + // arrIndex could either be a contained memory op or a reg + // In this case we will generate one of the following + // cmp [mem], immed (if arrLen is a constant) + // cmp [mem], reg (if arrLen is in a reg) + // cmp reg, immed (if arrIndex is in a reg) + // cmp reg1, reg2 (if arraIndex is in reg1) + // cmp reg, [mem] (if arrLen is a memory op) + // + // That is only one of arrIndex or arrLen can be a memory op. + assert(!arrIndex->isContainedMemoryOp() || !arrLen->isContainedMemoryOp()); + src1 = arrIndex; src2 = arrLen; jmpKind = EJ_jae; } -#if DEBUG var_types bndsChkType = src2->TypeGet(); - +#if DEBUG // Bounds checks can only be 32 or 64 bit sized comparisons. assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG); @@ -4308,7 +4075,7 @@ CodeGen::genRangeCheck(GenTreePtr oper) assert(emitTypeSize(bndsChkType) >= emitTypeSize(src1->TypeGet())); #endif //DEBUG - getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(src2->TypeGet()), src1, src2); + getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(bndsChkType), src1, src2); genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind, bndsChk->gtIndRngFailBB); } @@ -4546,6 +4313,10 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) case GT_RSZ: ins = INS_shr; break; case GT_SUB: ins = INS_sub; break; case GT_XOR: ins = INS_xor; break; +#if !defined(_TARGET_64BIT_) + case GT_ADD_HI: ins = INS_adc; break; + case GT_SUB_HI: ins = INS_sbb; break; +#endif // !defined(_TARGET_64BIT_) default: unreached(); break; } @@ -4578,8 +4349,11 @@ void CodeGen::genCodeForShift(GenTreePtr operand, GenTreePtr shiftBy, regNumber operandReg = REG_NA; regNumber indexReg = REG_NA; int offset = 0; + ssize_t disp = 0; emitAttr attr = EA_UNKNOWN; - bool isClsVarAddr = (operand->OperGet() == GT_CLS_VAR_ADDR);; + bool isClsVarAddr = (operand->OperGet() == GT_CLS_VAR_ADDR); + bool isLclVarAddr = (operand->OperGet() == GT_LCL_VAR_ADDR); + bool isCnsIntOrIAndFitsWithinAddrBase = false; if (!isRMW) { @@ -4589,6 +4363,7 @@ void CodeGen::genCodeForShift(GenTreePtr operand, GenTreePtr shiftBy, else { targetType = parent->gtOp.gtOp1->TypeGet(); + attr = EA_ATTR(genTypeSize(targetType)); if (actualOperand->OperGet() == GT_LCL_VAR) { @@ -4607,17 +4382,39 @@ void CodeGen::genCodeForShift(GenTreePtr operand, GenTreePtr shiftBy, assert(!shiftBy->isContainedIntOrIImmed()); } } + else if (actualOperand->IsCnsIntOrI()) + { + GenTreeIntConCommon* intCon = actualOperand->AsIntConCommon(); + if (actualOperand->isContained()) + { + // Contained absolute address should fit within addr base + assert(intCon->FitsInAddrBase(compiler)); + + // Don't expect to see GT_COPY or GT_RELOAD + assert(operand == actualOperand); + + isCnsIntOrIAndFitsWithinAddrBase = true; + disp = intCon->IconValue(); + + if (intCon->AddrNeedsReloc(compiler)) + { + attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG); + } + } + else + { + operandReg = operand->gtRegNum; + } + } else { - // The only other supported operand for RMW is GT_CLS_VAR_ADDR - assert(actualOperand->OperGet() == GT_CLS_VAR_ADDR); + // The only other supported operands for RMW are GT_CLS_VAR_ADDR and GT_LCL_VAR_ADDR + assert(actualOperand->OperGet() == GT_CLS_VAR_ADDR || actualOperand->OperGet() == GT_LCL_VAR_ADDR); - // We don't expect to see GT_COPY or GT_RELOAD for GT_CLS_VAR_ADDR - // so 'actualOperand' should be the same as 'operand' + // We don't expect to see GT_COPY or GT_RELOAD for GT_CLS_VAR_ADDR and GT_LCL_VAR_ADDR + // so 'actualOperand' should be the same as 'operand' assert(operand == actualOperand); } - - attr = EA_ATTR(genTypeSize(targetType)); } if (shiftBy->isContainedIntOrIImmed()) @@ -4638,7 +4435,7 @@ void CodeGen::genCodeForShift(GenTreePtr operand, GenTreePtr shiftBy, } else { - if (isClsVarAddr && shiftByValue == 1) + if ((isClsVarAddr || isLclVarAddr) && shiftByValue == 1) { switch (ins) { @@ -4661,10 +4458,18 @@ void CodeGen::genCodeForShift(GenTreePtr operand, GenTreePtr shiftBy, // leave 'ins' unchanged break; } + + if (isClsVarAddr) + { getEmitter()->emitIns_C(ins, attr, operand->gtClsVar.gtClsVarHnd, 0); } else { + getEmitter()->emitIns_S(ins, attr, operand->gtLclVarCommon.gtLclNum, 0); + } + } + else + { switch (ins) { case INS_sar: @@ -4690,6 +4495,14 @@ void CodeGen::genCodeForShift(GenTreePtr operand, GenTreePtr shiftBy, { getEmitter()->emitIns_C_I(ins, attr, operand->gtClsVar.gtClsVarHnd, 0, shiftByValue); } + else if (isLclVarAddr) + { + getEmitter()->emitIns_S(ins, attr, operand->gtLclVarCommon.gtLclNum, 0); + } + else if (isCnsIntOrIAndFitsWithinAddrBase) + { + getEmitter()->emitIns_I_AI(ins, attr, shiftByValue, disp); + } else { getEmitter()->emitIns_I_AR(ins, attr, shiftByValue, operandReg, offset); @@ -4722,9 +4535,17 @@ void CodeGen::genCodeForShift(GenTreePtr operand, GenTreePtr shiftBy, { getEmitter()->emitIns_C_R(ins, attr, operand->gtClsVar.gtClsVarHnd, shiftReg, 0); } + else if (isLclVarAddr) + { + getEmitter()->emitIns_S_R(ins, attr, shiftReg, operand->gtLclVarCommon.gtLclNum, 0); + } + else if (isCnsIntOrIAndFitsWithinAddrBase) + { + getEmitter()->emitIns_AI_R(ins, attr, shiftReg, disp); + } else { - getEmitter()->emitIns_AR_R(ins, attr, indexReg, operandReg, offset); + getEmitter()->emitIns_AR_R(ins, attr, indexReg, operandReg, (int) offset); } } else @@ -4824,7 +4645,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree *tree) } #endif // DEBUG - regSet.rsMaskVars |= genGetRegMask(varDsc); + regSet.AddMaskVars(genGetRegMask(varDsc)); } } else @@ -4876,12 +4697,14 @@ void CodeGen::genRegCopy(GenTree* treeNode) // register, in which case it is passed as an argument, or returned from a call, // in an integer register and must be copied if it's in an xmm register. - if (varTypeIsFloating(treeNode) != varTypeIsFloating(op1)) + bool srcFltReg = (varTypeIsFloating(op1) || varTypeIsSIMD(op1)); + bool tgtFltReg = (varTypeIsFloating(treeNode) || varTypeIsSIMD(treeNode)); + if (srcFltReg != tgtFltReg) { instruction ins; regNumber fpReg; regNumber intReg; - if(varTypeIsFloating(treeNode)) + if (tgtFltReg) { ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet()); fpReg = targetReg; @@ -5419,6 +5242,264 @@ void CodeGen::genEmitCall(int callType, indir->Offset()); } + +//------------------------------------------------------------------------ +// genStoreInd: Generate code for a GT_STOREIND node. +// +// Arguments: +// treeNode - The GT_STOREIND node for which to generate code. +// +// Return Value: +// none + +void CodeGen::genStoreInd(GenTreePtr node) +{ + assert(node->OperGet() == GT_STOREIND); + +#ifdef FEATURE_SIMD + // Storing Vector3 of size 12 bytes through indirection + if (node->TypeGet() == TYP_SIMD12) + { + genStoreIndTypeSIMD12(node); + return; + } +#endif //FEATURE_SIMD + + GenTreeStoreInd* storeInd = node->AsStoreInd(); + GenTree* data = storeInd->Data(); + GenTree* addr = storeInd->Addr(); + var_types targetType = node->TypeGet(); + + assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet())); + + GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(node, data); + if (writeBarrierForm != GCInfo::WBF_NoBarrier) + { + // data and addr must be in registers. + // Consume both registers so that any copies of interfering registers are taken care of. + genConsumeOperands(node->AsOp()); + + if (genEmitOptimizedGCWriteBarrier(writeBarrierForm, addr, data)) + return; + + // At this point, we should not have any interference. + // That is, 'data' must not be in REG_ARG_0, as that is where 'addr' must go. + noway_assert(data->gtRegNum != REG_ARG_0); + + // addr goes in REG_ARG_0 + if (addr->gtRegNum != REG_ARG_0) + { + inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet()); + } + + // data goes in REG_ARG_1 + if (data->gtRegNum != REG_ARG_1) + { + inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet()); + } + + genGCWriteBarrier(node, writeBarrierForm); + } + else + { + bool reverseOps = ((node->gtFlags & GTF_REVERSE_OPS) != 0); + bool dataIsUnary = false; + bool isRMWMemoryOp = storeInd->IsRMWMemoryOp(); + GenTree* rmwSrc = nullptr; + + // We must consume the operands in the proper execution order, so that liveness is + // updated appropriately. + if (!reverseOps) + { + genConsumeAddress(addr); + } + + // If storeInd represents a RMW memory op then its data is a non-leaf node marked as contained + // and non-indir operand of data is the source of RMW memory op. + if (isRMWMemoryOp) + { + assert(data->isContained() && !data->OperIsLeaf()); + + GenTreePtr rmwDst = nullptr; + + dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0); + if (!dataIsUnary) + { + if (storeInd->IsRMWDstOp1()) + { + rmwDst = data->gtGetOp1(); + rmwSrc = data->gtGetOp2(); + } + else + { + assert(storeInd->IsRMWDstOp2()); + rmwDst = data->gtGetOp2(); + rmwSrc = data->gtGetOp1(); + } + } + else + { + // For unary RMW ops, src and dst of RMW memory op is the same. + assert(storeInd->IsRMWDstOp1()); + rmwSrc = data->gtGetOp1(); + rmwDst = data->gtGetOp1(); + assert(rmwSrc->isContained()); + } + + assert(rmwSrc != nullptr); + assert(rmwDst != nullptr); + assert(Lowering::IndirsAreEquivalent(rmwDst, node)); + + genConsumeRegs(rmwSrc); + } + else + { + genConsumeRegs(data); + } + + if (reverseOps) + { + genConsumeAddress(addr); + } + + if (isRMWMemoryOp) + { + if (dataIsUnary) + { + // generate code for unary RMW memory ops like neg/not + getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(node), node); + } + else + { + if (data->OperGet() == GT_LSH || + data->OperGet() == GT_RSH || + data->OperGet() == GT_RSZ || + data->OperGet() == GT_ROL || + data->OperGet() == GT_ROR) + { + // generate code for shift RMW memory ops + genCodeForShift(addr, rmwSrc, data); + } + else + { + // generate code for remaining binary RMW memory ops like add/sub/and/or/xor + getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(node), node, rmwSrc); + } + } + } + else + { + getEmitter()->emitInsMov(ins_Store(data->TypeGet()), emitTypeSize(node), node); + } + } +} + + +//------------------------------------------------------------------------ +// genEmitOptimizedGCWriteBarrier: Generate write barrier store using the optimized +// helper functions. +// +// Arguments: +// writeBarrierForm - the write barrier form to use +// addr - the address at which to do the store +// data - the data to store +// +// Return Value: +// true if an optimized write barrier form was used, false if not. If this +// function returns false, the caller must emit a "standard" write barrier. + +bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data) +{ + assert(writeBarrierForm != GCInfo::WBF_NoBarrier); + +#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS + bool useOptimizedWriteBarriers = true; + +#ifdef DEBUG + useOptimizedWriteBarriers = (writeBarrierForm != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method. +#endif + + if (!useOptimizedWriteBarriers) + { + return false; + } + + const static int regToHelper[2][8] = + { + // If the target is known to be in managed memory + { + CORINFO_HELP_ASSIGN_REF_EAX, + CORINFO_HELP_ASSIGN_REF_ECX, + -1, + CORINFO_HELP_ASSIGN_REF_EBX, + -1, + CORINFO_HELP_ASSIGN_REF_EBP, + CORINFO_HELP_ASSIGN_REF_ESI, + CORINFO_HELP_ASSIGN_REF_EDI, + }, + + // Don't know if the target is in managed memory + { + CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, + CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, + -1, + CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, + -1, + CORINFO_HELP_CHECKED_ASSIGN_REF_EBP, + CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, + CORINFO_HELP_CHECKED_ASSIGN_REF_EDI, + }, + }; + + noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX); + noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX); + noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX); + noway_assert(regToHelper[0][REG_ESP] == -1); + noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP); + noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI); + noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI); + + noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX); + noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX); + noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX); + noway_assert(regToHelper[1][REG_ESP] == -1); + noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP); + noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI); + noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI); + + regNumber reg = data->gtRegNum; + noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER)); + + // Generate the following code: + // lea edx, addr + // call write_barrier_helper_reg + + // addr goes in REG_ARG_0 + if (addr->gtRegNum != REG_WRITE_BARRIER) // REVIEW: can it ever not already by in this register? + { + inst_RV_RV(INS_mov, REG_WRITE_BARRIER, addr->gtRegNum, addr->TypeGet()); + } + + unsigned tgtAnywhere = 0; + if (writeBarrierForm != GCInfo::WBF_BarrierUnchecked) + { + tgtAnywhere = 1; + } + + // We might want to call a modified version of genGCWriteBarrier() to get the benefit of + // the FEATURE_COUNT_GC_WRITE_BARRIERS code there, but that code doesn't look like it works + // with rationalized RyuJIT IR. So, for now, just emit the helper call directly here. + + genEmitHelperCall(regToHelper[tgtAnywhere][reg], + 0, // argSize + EA_PTRSIZE); // retSize + + return true; +#else // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS + return false; +#endif // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS +} + // Produce code for a GT_CALL node void CodeGen::genCallInstruction(GenTreePtr node) { @@ -5608,6 +5689,9 @@ void CodeGen::genCallInstruction(GenTreePtr node) retSize = EA_BYREF; } + bool fPossibleSyncHelperCall = false; + CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF; + #ifdef DEBUGGING_SUPPORT // We need to propagate the IL offset information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. @@ -5627,7 +5711,7 @@ void CodeGen::genCallInstruction(GenTreePtr node) { // Note that if gtControlExpr is an indir of an absolute address, we mark it as // contained only if it can be encoded as PC-relative offset. - assert(genAddrShouldUsePCRel(target->AsIndir()->Base()->AsIntConCommon()->IconValue())); + assert(target->AsIndir()->Base()->AsIntConCommon()->FitsInAddrBase(compiler)); genEmitCall(emitter::EC_FUNC_TOKEN_INDIR, methHnd, @@ -5694,7 +5778,7 @@ void CodeGen::genCallInstruction(GenTreePtr node) if (callType == CT_HELPER) { // Direct call to a helper method. - CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd); + helperNum = compiler->eeGetHelperNum(methHnd); noway_assert(helperNum != CORINFO_HELP_UNDEF); void *pAddr = nullptr; @@ -5704,6 +5788,12 @@ void CodeGen::genCallInstruction(GenTreePtr node) { addr = pAddr; } + + // tracking of region protected by the monitor in synchronized methods + if (compiler->info.compFlags & CORINFO_FLG_SYNCH) + { + fPossibleSyncHelperCall = true; + } } else { @@ -5793,6 +5883,39 @@ void CodeGen::genCallInstruction(GenTreePtr node) { gcInfo.gcMarkRegSetNpt(RBM_INTRET); } + +#if defined(_TARGET_X86_) + //------------------------------------------------------------------------- + // Create a label for tracking of region protected by the monitor in synchronized methods. + // This needs to be here, rather than above where fPossibleSyncHelperCall is set, + // so the GC state vars have been updated before creating the label. + + if (fPossibleSyncHelperCall) + { + switch (helperNum) { + case CORINFO_HELP_MON_ENTER: + case CORINFO_HELP_MON_ENTER_STATIC: + noway_assert(compiler->syncStartEmitCookie == NULL); + compiler->syncStartEmitCookie = getEmitter()->emitAddLabel( + gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur); + noway_assert(compiler->syncStartEmitCookie != NULL); + break; + case CORINFO_HELP_MON_EXIT: + case CORINFO_HELP_MON_EXIT_STATIC: + noway_assert(compiler->syncEndEmitCookie == NULL); + compiler->syncEndEmitCookie = getEmitter()->emitAddLabel( + gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur); + noway_assert(compiler->syncEndEmitCookie != NULL); + break; + default: + break; + } + } +#endif // _TARGET_X86_ } #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) @@ -5990,12 +6113,23 @@ void CodeGen::genJmpMethod(GenTreePtr jmp) // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live. // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. // Therefore manually update life of varDsc->lvRegNum. - regMaskTP tempMask = genRegMask(varDsc->lvRegNum); - regSet.rsMaskVars &= ~tempMask; + regMaskTP tempMask = varDsc->lvRegMask(); + regSet.RemoveMaskVars(tempMask); gcInfo.gcMarkRegSetNpt(tempMask); - if (varDsc->lvTracked) + if (compiler->lvaIsGCTracked(varDsc)) { - VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varNum); +#ifdef DEBUG + if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) + { + JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum); + } + else + { + JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum); + } +#endif // DEBUG + + VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); } } @@ -6090,11 +6224,22 @@ void CodeGen::genJmpMethod(GenTreePtr jmp) // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList(). - regSet.rsMaskVars |= genRegMask(argReg); + regSet.AddMaskVars(genRegMask(argReg)); gcInfo.gcMarkRegPtrVal(argReg, loadType); - if (varDsc->lvTracked) + if (compiler->lvaIsGCTracked(varDsc)) { - VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum); +#ifdef DEBUG + if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) + { + JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming dead\n", varNum); + } + else + { + JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing dead\n", varNum); + } +#endif // DEBUG + + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); } } } @@ -6290,6 +6435,475 @@ void CodeGen::genJumpKindsForTree(GenTreePtr cmpTree, } } +#if !defined(_TARGET_64BIT_) +//------------------------------------------------------------------------ +// genJumpKindsForTreeLongHi: Generate the jump types for compare +// operators of the high parts of a compare with long type operands +// on x86 +// +// Arguments: +// cmpTree - The GT_CMP node +// jmpKind - Return array of jump kinds +// jmpToTrueLabel - Return array of if the jump is going to true label +// +// Return Value: +// None. +// +void CodeGen::genJumpKindsForTreeLongHi(GenTreePtr cmpTree, + emitJumpKind jmpKind[2], + bool jmpToTrueLabel[2]) +{ + jmpToTrueLabel[0] = true; + jmpToTrueLabel[1] = true; + + assert(cmpTree->OperIsCompare()); + + bool isUnsigned = (cmpTree->gtFlags & GTF_UNSIGNED) != 0; + + // For comparison of longs on x86, GT_LT, GT_LE, GT_GT, and GT_GE need two jump cases, since + // only if the hi operators are equal will we fall through. + switch (cmpTree->gtOper) + { + case GT_LT: + case GT_LE: + case GT_GT: + case GT_GE: + if (isUnsigned) + { + jmpKind[0] = EJ_ja; + } + else + { + jmpKind[0] = EJ_jg; + } + jmpKind[1] = EJ_jne; + break; + + case GT_NE: + case GT_EQ: + jmpKind[0] = EJ_jne; + jmpKind[1] = EJ_NONE; + break; + + default: + unreached(); + } +} + +//------------------------------------------------------------------------ +// genJumpKindsForTreeLongLo: Generate the jump types for compare +// operators of the low parts of a compare with long type operands +// on x86 +// +// Arguments: +// cmpTree - The GT_CMP node +// jmpKind - Return array of jump kinds +// jmpToTrueLabel - Return array of if the jump is going to true label +// +// Return Value: +// None. +// +void CodeGen::genJumpKindsForTreeLongLo(GenTreePtr cmpTree, + emitJumpKind jmpKind[2], + bool jmpToTrueLabel[2]) +{ + jmpToTrueLabel[0] = true; + jmpToTrueLabel[1] = true; + + assert(cmpTree->OperIsCompare()); + jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, true); + jmpKind[1] = EJ_NONE; +} + +//------------------------------------------------------------------------ +// genCompareLong: Generate code for comparing two longs on x86 +// +// Arguments: +// treeNode - the compare tree +// +// Return Value: +// None. +// Comments: +// For long compares, we need to compare the high parts of operands first, then the low parts. +// If the high compare is false, we do not need to compare the low parts. For less than and +// greater than, if the high compare is true, we can assume the entire compare is true. +// +// Opcode x86 equivalent Comment +// ------ -------------- ------- +// GT_EQ cmp hiOp1,hiOp2 If any part is not equal, the entire compare +// jne label is false. +// cmp loOp1,loOp2 +// label: sete +// +// GT_NE cmp hiOp1,hiOp2 If any part is not equal, the entire compare +// jne label is true. +// cmp loOp1,loOp2 +// label: setne +// +// GT_LT cmp hiOp1,hiOp2 If hiOp1 is greater than hiOp2, the entire compare +// ja label is false. If hiOp1 is less than hiOp2, the entire +// jne label compare is true. +// cmp loOp1,loOp2 +// label: setb +// +// GT_LE cmp hiOp1,hiOp2 If hiOp1 is greater than hiOp2, the entire compare +// ja label is false. If hiOp1 is less than hiOp2, the entire +// jne label compare is true. +// cmp loOp1,loOp2 +// label: setbe +// +// GT_GT cmp hiOp1,hiOp2 If hiOp1 is greater than hiOp2, the entire compare +// ja label is true. If hiOp1 is less than hiOp2, the entire +// jne label compare is false. +// cmp loOp1,loOp2 +// label: seta +// +// GT_GE cmp hiOp1,hiOp2 If hiOp1 is greater than hiOp2, the entire compare +// ja label is true. If hiOp1 is less than hiOp2, the entire +// jne label compare is false. +// cmp loOp1,loOp2 +// label: setae +// +// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test. +void CodeGen::genCompareLong(GenTreePtr treeNode) +{ + assert(treeNode->OperIsCompare()); + + GenTreeOp *tree = treeNode->AsOp(); + GenTreePtr op1 = tree->gtOp1; + GenTreePtr op2 = tree->gtOp2; + + genConsumeOperands(tree); + + assert(varTypeIsLong(op1->TypeGet()) && varTypeIsLong(op2->TypeGet())); + regNumber targetReg = treeNode->gtRegNum; + + GenTreePtr loOp1 = op1->gtGetOp1(); + GenTreePtr hiOp1 = op1->gtGetOp2(); + GenTreePtr loOp2 = op2->gtGetOp1(); + GenTreePtr hiOp2 = op2->gtGetOp2(); + + // Create compare for the high parts + instruction ins = INS_cmp; + var_types cmpType = TYP_INT; + emitAttr cmpAttr = emitTypeSize(cmpType); + + // Emit the compare instruction + getEmitter()->emitInsBinary(ins, cmpAttr, hiOp1, hiOp2); + + // Generate the first jump for the high compare + emitJumpKind jumpKind[2]; + bool branchToTrueLabel[2]; + genJumpKindsForTreeLongHi(tree, jumpKind, branchToTrueLabel); + + BasicBlock* label = genCreateTempLabel(); + inst_JMP(jumpKind[0], label); + + // Generate the second jump for LE, LT, GT, and GE. We only do the lower compare if + // the hi parts are equal + if (jumpKind[1] != EJ_NONE) + { + assert(branchToTrueLabel[1]); + inst_JMP(jumpKind[1], label); + } + + // Now create compare for low parts + ins = INS_cmp; + cmpType = TYP_INT; + cmpAttr = emitTypeSize(cmpType); + + // Emit the comparison + getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2); + + // Define the label for hi jump target here. If we have jumped here, we want to set + // the target register based on the jump kind of the lower half (the actual compare + // type). If we have fallen through, then we are doing a normal int compare for the + // lower parts + + genDefineTempLabel(label); + if (targetReg != REG_NA) + { + emitJumpKind jumpKindLo[2]; + bool branchToTrueLabelLo[2]; + + // The low set must be unsigned + genJumpKindsForTreeLongLo(tree, jumpKindLo, branchToTrueLabelLo); + inst_SET(jumpKindLo[0], targetReg); + + // Set the higher bytes to 0 + inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE)); + genProduceReg(tree); + } +} +#endif //!defined(_TARGET_64BIT_) + +//------------------------------------------------------------------------ +// genCompareFloat: Generate code for comparing two floating point values +// +// Arguments: +// treeNode - the compare tree +// +// Return Value: +// None. +// Comments: +// SSE2 instruction ucomis[s|d] is performs unordered comparison and +// updates rFLAGS register as follows. +// Result of compare ZF PF CF +// ----------------- ------------ +// Unordered 1 1 1 <-- this result implies one of operands of compare is a NAN. +// Greater 0 0 0 +// Less Than 0 0 1 +// Equal 1 0 0 +// +// From the above table the following equalities follow. As per ECMA spec *.UN opcodes perform +// unordered comparison of floating point values. That is *.UN comparisons result in true when +// one of the operands is a NaN whereas ordered comparisons results in false. +// +// Opcode Amd64 equivalent Comment +// ------ ----------------- -------- +// BLT.UN(a,b) ucomis[s|d] a, b Jb branches if CF=1, which means either a<b or unordered from the above table. +// jb +// +// BLT(a,b) ucomis[s|d] b, a Ja branches if CF=0 and ZF=0, which means b>a that in turn implies a<b +// ja +// +// BGT.UN(a,b) ucomis[s|d] b, a branch if b<a or unordered ==> branch if a>b or unordered +// jb +// +// BGT(a, b) ucomis[s|d] a, b branch if a>b +// ja +// +// BLE.UN(a,b) ucomis[s|d] a, b jbe branches if CF=1 or ZF=1, which implies a<=b or unordered +// jbe +// +// BLE(a,b) ucomis[s|d] b, a jae branches if CF=0, which mean b>=a or a<=b +// jae +// +// BGE.UN(a,b) ucomis[s|d] b, a branch if b<=a or unordered ==> branch if a>=b or unordered +// jbe +// +// BGE(a,b) ucomis[s|d] a, b branch if a>=b +// jae +// +// BEQ.UN(a,b) ucomis[s|d] a, b branch if a==b or unordered. There is no BEQ.UN opcode in ECMA spec. +// je This case is given for completeness, in case if JIT generates such +// a gentree internally. +// +// BEQ(a,b) ucomis[s|d] a, b From the above table, PF=0 and ZF=1 corresponds to a==b. +// jpe L1 +// je <true label> +// L1: +// +// BNE(a,b) ucomis[s|d] a, b branch if a!=b. There is no BNE opcode in ECMA spec. This case is +// jne given for completeness, in case if JIT generates such a gentree +// internally. +// +// BNE.UN(a,b) ucomis[s|d] a, b From the above table, PF=1 or ZF=0 implies unordered or a!=b +// jpe <true label> +// jne <true label> +// +// As we can see from the above equalities that the operands of a compare operator need to be +// reveresed in case of BLT/CLT, BGT.UN/CGT.UN, BLE/CLE, BGE.UN/CGE.UN. +void CodeGen::genCompareFloat(GenTreePtr treeNode) +{ + assert(treeNode->OperIsCompare()); + + GenTreeOp *tree = treeNode->AsOp(); + GenTreePtr op1 = tree->gtOp1; + GenTreePtr op2 = tree->gtOp2; + var_types op1Type = op1->TypeGet(); + var_types op2Type = op2->TypeGet(); + + genConsumeOperands(tree); + + assert(varTypeIsFloating(op1Type)); + assert(op1Type == op2Type); + + regNumber targetReg = treeNode->gtRegNum; + instruction ins; + emitAttr cmpAttr; + + bool reverseOps; + if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0) + { + // Unordered comparison case + reverseOps = (tree->gtOper == GT_GT || tree->gtOper == GT_GE); + } + else + { + reverseOps = (tree->gtOper == GT_LT || tree->gtOper == GT_LE); + } + + if (reverseOps) + { + GenTreePtr tmp = op1; + op1 = op2; + op2 = tmp; + } + + ins = ins_FloatCompare(op1Type); + cmpAttr = emitTypeSize(op1Type); + + getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2); + + // Are we evaluating this into a register? + if (targetReg != REG_NA) + { + genSetRegToCond(targetReg, tree); + genProduceReg(tree); + } +} + +//------------------------------------------------------------------------ +// genCompareInt: Generate code for comparing ints or, on amd64, longs. +// +// Arguments: +// treeNode - the compare tree +// +// Return Value: +// None. +void CodeGen::genCompareInt(GenTreePtr treeNode) +{ + assert(treeNode->OperIsCompare()); + + GenTreeOp *tree = treeNode->AsOp(); + GenTreePtr op1 = tree->gtOp1; + GenTreePtr op2 = tree->gtOp2; + var_types op1Type = op1->TypeGet(); + var_types op2Type = op2->TypeGet(); + + genConsumeOperands(tree); + + instruction ins; + emitAttr cmpAttr; + + regNumber targetReg = treeNode->gtRegNum; + assert(!op1->isContainedIntOrIImmed()); // We no longer support swapping op1 and op2 to generate cmp reg, imm + assert(!varTypeIsFloating(op2Type)); + +#ifdef _TARGET_X86_ + assert(!varTypeIsLong(op1Type) && !varTypeIsLong(op2Type)); +#endif // _TARGET_X86_ + + // By default we use an int32 sized cmp instruction + // + ins = INS_cmp; + var_types cmpType = TYP_INT; + + // In the if/then/else statement below we may change the + // 'cmpType' and/or 'ins' to generate a smaller instruction + + // Are we comparing two values that are the same size? + // + if (genTypeSize(op1Type) == genTypeSize(op2Type)) + { + if (op1Type == op2Type) + { + // If both types are exactly the same we can use that type + cmpType = op1Type; + } + else if (genTypeSize(op1Type) == 8) + { + // If we have two different int64 types we need to use a long compare + cmpType = TYP_LONG; + } + + cmpAttr = emitTypeSize(cmpType); + } + else // Here we know that (op1Type != op2Type) + { + // Do we have a short compare against a constant in op2? + // + // We checked for this case in LowerCmp() and if we can perform a small + // compare immediate we labeled this compare with a GTF_RELOP_SMALL + // and for unsigned small non-equality compares the GTF_UNSIGNED flag. + // + if (op2->isContainedIntOrIImmed() && ((tree->gtFlags & GTF_RELOP_SMALL) != 0)) + { + assert(varTypeIsSmall(op1Type)); + cmpType = op1Type; + } +#ifdef _TARGET_AMD64_ + else // compare two different sized operands + { + // For this case we don't want any memory operands, only registers or immediates + // + assert(!op1->isContainedMemoryOp()); + assert(!op2->isContainedMemoryOp()); + + // Check for the case where one operand is an int64 type + // Lower should have placed 32-bit operand in a register + // for signed comparisons we will sign extend the 32-bit value in place. + // + bool op1Is64Bit = (genTypeSize(op1Type) == 8); + bool op2Is64Bit = (genTypeSize(op2Type) == 8); + if (op1Is64Bit) + { + cmpType = TYP_LONG; + if (!(tree->gtFlags & GTF_UNSIGNED) && !op2Is64Bit) + { + assert(op2->gtRegNum != REG_NA); + inst_RV_RV(INS_movsxd, op2->gtRegNum, op2->gtRegNum, op2Type); + } + } + else if (op2Is64Bit) + { + cmpType = TYP_LONG; + if (!(tree->gtFlags & GTF_UNSIGNED) && !op1Is64Bit) + { + assert(op1->gtRegNum != REG_NA); + } + } + } +#endif // _TARGET_AMD64_ + + cmpAttr = emitTypeSize(cmpType); + } + + // See if we can generate a "test" instruction instead of a "cmp". + // For this to generate the correct conditional branch we must have + // a compare against zero. + // + if (op2->IsZero()) + { + if (op1->isContained()) + { + // op1 can be a contained memory op + // or the special contained GT_AND that we created in Lowering::LowerCmp() + // + if ((op1->OperGet() == GT_AND)) + { + noway_assert(op1->gtOp.gtOp2->isContainedIntOrIImmed()); + + ins = INS_test; // we will generate "test andOp1, andOp2CnsVal" + op2 = op1->gtOp.gtOp2; // must assign op2 before we overwrite op1 + op1 = op1->gtOp.gtOp1; // overwrite op1 + + if (op1->isContainedMemoryOp()) + { + // use the size andOp1 if it is a contained memoryop. + cmpAttr = emitTypeSize(op1->TypeGet()); + } + // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2); + } + } + else // op1 is not contained thus it must be in a register + { + ins = INS_test; + op2 = op1; // we will generate "test reg1,reg1" + // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2); + } + } + + getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2); + + // Are we evaluating this into a register? + if (targetReg != REG_NA) + { + genSetRegToCond(targetReg, tree); + genProduceReg(tree); + } +} // Generate code to materialize a condition into a register // (the condition codes must already have been appropriately set) @@ -6381,6 +6995,7 @@ void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree) // Neither the source nor target type can be a floating point type. // // TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register. +// TODO: refactor to use getCastDescription // void CodeGen::genIntToIntCast(GenTreePtr treeNode) { @@ -7108,7 +7723,7 @@ int CodeGenInterface::genCallerSPtoInitialSPdelta() // None // // Assumptions: -// i) tree oper is one of GT_NEG or GT_MATH Abs() +// i) tree oper is one of GT_NEG or GT_INTRINSIC Abs() // ii) tree type is floating point type. // iii) caller of this routine needs to call genProduceReg() void @@ -7151,8 +7766,8 @@ CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode) } break; - case GT_MATH: - assert(treeNode->gtMath.gtMathFN == CORINFO_INTRINSIC_Abs); + case GT_INTRINSIC: + assert(treeNode->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs); // Abs(x) = set sign-bit to zero // Abs(f) = f & 0x7fffffff @@ -7216,19 +7831,19 @@ CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode) } //--------------------------------------------------------------------- -// genMathIntrinsic - generate code for a given math intrinsic +// genIntrinsic - generate code for a given intrinsic // // Arguments -// treeNode - the GT_MATH node +// treeNode - the GT_INTRINSIC node // // Return value: // None // void -CodeGen::genMathIntrinsic(GenTreePtr treeNode) +CodeGen::genIntrinsic(GenTreePtr treeNode) { // Right now only Sqrt/Abs are treated as math intrinsics. - switch(treeNode->gtMath.gtMathFN) + switch(treeNode->gtIntrinsic.gtIntrinsicId) { case CORINFO_INTRINSIC_Sqrt: noway_assert(treeNode->TypeGet() == TYP_DOUBLE); @@ -7241,7 +7856,7 @@ CodeGen::genMathIntrinsic(GenTreePtr treeNode) break; default: - assert(!"genMathIntrinsic: Unsupported math intrinsic"); + assert(!"genIntrinsic: Unsupported intrinsic"); unreached(); } @@ -7406,8 +8021,7 @@ CodeGen::genPutArgStk(GenTreePtr treeNode) // None // void -CodeGen::genPutStructArgStk(GenTreePtr treeNode - FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned baseVarNum)) +CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum) { assert(treeNode->OperGet() == GT_PUTARG_STK); assert(baseVarNum != BAD_VAR_NUM); @@ -7750,10 +8364,8 @@ void CodeGen::genEmitHelperCall(unsigned helper, #endif // !LEGACY_BACKEND ) { - void * addr = NULL, *pAddr = NULL; -#ifdef LEGACY_BACKEND - regNumber callTargetReg = REG_EAX; -#endif // LEGACY_BACKEND + void* addr = nullptr; + void* pAddr = nullptr; emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); @@ -7763,17 +8375,23 @@ void CodeGen::genEmitHelperCall(unsigned helper, if (!addr) { assert(pAddr != nullptr); - if (genAddrShouldUsePCRel((size_t)pAddr)) + + // Absolute indirect call addr + // Note: Order of checks is important. First always check for pc-relative and next + // zero-relative. Because the former encoding is 1-byte smaller than the latter. + if (genCodeIndirAddrCanBeEncodedAsPCRelOffset((size_t)pAddr) || + genCodeIndirAddrCanBeEncodedAsZeroRelOffset((size_t)pAddr)) { - // generate call whose target is specified by PC-relative 32-bit offset. + // generate call whose target is specified by 32-bit offset relative to PC or zero. callType = emitter::EC_FUNC_TOKEN_INDIR; addr = pAddr; } else { #ifdef _TARGET_AMD64_ - // If this address cannot be encoded as PC-relative 32-bit offset, load it into REG_HELPER_CALL_TARGET - // and use register indirect addressing mode to make the call. + // If this indirect address cannot be encoded as 32-bit offset relative to PC or Zero, + // load it into REG_HELPER_CALL_TARGET and use register indirect addressing mode to + // make the call. // mov reg, addr // call [reg] @@ -7793,6 +8411,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, noway_assert((callTargetMask & regSet.rsMaskVars) == RBM_NONE); } #endif + callTarget = callTargetReg; CodeGen::genSetRegToIcon(callTarget, (ssize_t) pAddr, TYP_I_IMPL); callType = emitter::EC_INDIR_ARD; diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp index b54657202a..d56a79a203 100644 --- a/src/jit/compiler.cpp +++ b/src/jit/compiler.cpp @@ -24,6 +24,16 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "lower.h" #endif // !LEGACY_BACKEND +#include "jittelemetry.h" + +#if defined(DEBUG) +// Column settings for COMPLUS_JitDumpIR. We could(should) make these programmable. +#define COLUMN_OPCODE 30 +#define COLUMN_OPERANDS (COLUMN_OPCODE + 25) +#define COLUMN_KINDS 110 +#define COLUMN_FLAGS (COLUMN_KINDS + 32) +#endif + #if defined(DEBUG) || MEASURE_INLINING unsigned Compiler::jitTotalMethodCompiled = 0; unsigned Compiler::jitTotalMethodInlined = 0; @@ -373,6 +383,151 @@ histo loopExitCountTable(DefaultAllocator::Singleton(), loopExitCountBucke #endif // COUNT_LOOPS +//------------------------------------------------------------------------ +// argOrReturnTypeForStruct: Get the "primitive" type, if any, that is used to pass or return +// values of the given struct type. +// +// Arguments: +// clsHnd - the handle for the struct type +// forReturn - true if we asking for this in a GT_RETURN context +// false if we are asking for this in a parameter passing context +// +// Return Value: +// The primitive type used to pass or return the struct, if applicable, or +// TYP_UNKNOWN otherwise. +// +// Assumptions: +// The given class handle must be for a value type (struct). +// +// Notes: +// Most of the work is done by the method of the same name that takes the +// size of the struct. + + +var_types Compiler::argOrReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, bool forReturn) +{ + unsigned size = info.compCompHnd->getClassSize(clsHnd); + return argOrReturnTypeForStruct(size, clsHnd, forReturn); +} + +//------------------------------------------------------------------------ +// argOrReturnTypeForStruct: Get the "primitive" type, if any, that is used to pass or return +// values of the given struct type. +// +// Arguments: +// size - the size of the struct type +// clsHnd - the handle for the struct type +// forReturn - true if we asking for this in a GT_RETURN context +// false if we are asking for this in a parameter passing context +// +// Return Value: +// The primitive type used to pass or return the struct, if applicable, or +// TYP_UNKNOWN otherwise. +// +// Assumptions: +// The size must be the size of the given type. +// The given class handle must be for a value type (struct). +// +// Notes: +// Some callers call into this method directly, instead of the above method, +// when they have already determined the size. +// This is to avoid a redundant call across the JIT/EE interface. + +var_types Compiler::argOrReturnTypeForStruct(unsigned size, CORINFO_CLASS_HANDLE clsHnd, bool forReturn) +{ + BYTE gcPtr = 0; + var_types useType = TYP_UNKNOWN; + + switch (size) + { + case 1: + useType = TYP_BYTE; + break; + + case 2: + useType = TYP_SHORT; + break; + +#ifndef _TARGET_XARCH_ + case 3: + useType = TYP_INT; + break; +#endif // _TARGET_AMD64_ + +#ifdef _TARGET_64BIT_ + case 4: + useType = TYP_INT; + break; +#endif // _TARGET_64BIT_ + + // Pointer size +#ifdef _TARGET_64BIT_ +#ifndef _TARGET_AMD64_ + case 5: + case 6: + case 7: +#endif // _TARGET_AMD64_ + case 8: +#else // !_TARGET_64BIT_ + case 4: +#endif // !_TARGET_64BIT_ + info.compCompHnd->getClassGClayout(clsHnd, &gcPtr); + if (gcPtr == TYPE_GC_NONE) + { + useType = TYP_I_IMPL; + } + else if (gcPtr == TYPE_GC_REF) + { + useType = TYP_REF; + } + else if (gcPtr == TYPE_GC_BYREF) + { + useType = TYP_BYREF; + } + else + { + assert(!"Bad value of CorInfoGCType"); + } + break; + + default: +#if FEATURE_MULTIREG_STRUCT_RET + if (forReturn) + { + if (size <= MAX_RET_MULTIREG_BYTES) + { +#ifdef _TARGET_ARM64_ + assert(size > TARGET_POINTER_SIZE); + + // For structs that are 9 to 16 bytes in size set useType to TYP_STRUCT, + // as this means a 9-16 byte struct value in two registers + // + useType = TYP_STRUCT; +#endif // _TARGET_ARM64_ + } + } +#endif // FEATURE_MULTIREG_STRUCT_RET + +#if FEATURE_MULTIREG_STRUCT_ARGS + if (!forReturn) + { + if (size <= MAX_PASS_MULTIREG_BYTES) + { +#ifdef _TARGET_ARM64_ + assert(size > TARGET_POINTER_SIZE); + + // For structs that are 9 to 16 bytes in size set useType to TYP_STRUCT, + // as this means a 9-16 byte struct value in two registers + // + useType = TYP_STRUCT; +#endif // _TARGET_ARM64_ + } + } +#endif // FEATURE_MULTIREG_STRUCT_ARGS + break; + } + return useType; +} /***************************************************************************** * variables to keep track of how many iterations we go in a dataflow pass @@ -476,7 +631,7 @@ void Compiler::compStartup() /* static */ void Compiler::compShutdown() -{ +{ #ifdef ALT_JIT if (s_pAltJitExcludeAssembliesList != nullptr) { @@ -931,9 +1086,7 @@ void Compiler::compDisplayStaticSizes(FILE* fout) fprintf(fout, "Size of GenTreeCmpXchg = %3u\n", sizeof(GenTreeCmpXchg)); fprintf(fout, "Size of GenTreeFptrVal = %3u\n", sizeof(GenTreeFptrVal)); fprintf(fout, "Size of GenTreeQmark = %3u\n", sizeof(GenTreeQmark)); -#if INLINE_MATH - fprintf(fout, "Size of GenTreeMath = %3u\n", sizeof(GenTreeMath)); -#endif // INLINE_MATH + fprintf(fout, "Size of GenTreeIntrinsic = %3u\n", sizeof(GenTreeIntrinsic)); fprintf(fout, "Size of GenTreeIndex = %3u\n", sizeof(GenTreeIndex)); fprintf(fout, "Size of GenTreeArrLen = %3u\n", sizeof(GenTreeArrLen)); fprintf(fout, "Size of GenTreeBoundsChk = %3u\n", sizeof(GenTreeBoundsChk)); @@ -1049,6 +1202,7 @@ void Compiler::compInit(norls_allocator * pAlloc, InlineInfo * in eeInfoInitialized = false; + compDoAggressiveInlining = false; if (compIsForInlining()) { @@ -1079,6 +1233,20 @@ void Compiler::compInit(norls_allocator * pAlloc, InlineInfo * in compQMarks = new (this, CMK_Unknown) ExpandArrayStack<GenTreePtr>(getAllocator()); } +#ifdef FEATURE_TRACELOGGING + // Make sure JIT telemetry is initialized as soon as allocations can be made + // but no later than a point where noway_asserts can be thrown. + // 1. JIT telemetry could allocate some objects internally. + // 2. NowayAsserts are tracked through telemetry. + // Note: JIT telemetry could gather data when compiler is not fully initialized. + // So you have to initialize the compiler variables you use for telemetry. + assert((unsigned) PHASE_PRE_IMPORT == 0); + previousCompletedPhase = PHASE_PRE_IMPORT; + info.compILCodeSize = 0; + info.compMethodHnd = nullptr; + compJitTelemetry.Initialize(this); +#endif + #ifdef DEBUG bRangeAllowStress = false; #endif @@ -1165,8 +1333,8 @@ void Compiler::compInit(norls_allocator * pAlloc, InlineInfo * in compCodeGenDone = false; compRegSetCheckLevel = 0; opts.compMinOptsIsUsed = false; - opts.compMinOptsIsSet = false; #endif + opts.compMinOptsIsSet = false; //Used by fgFindJumpTargets for inlining heuristics. opts.instrCount = 0; @@ -1602,9 +1770,6 @@ void Compiler::compSetProcessor() #ifdef FEATURE_AVX_SUPPORT // COMPLUS_EnableAVX can be used to disable using AVX if available on a target machine. // Note that FEATURE_AVX_SUPPORT is not enabled for ctpjit -#ifdef RYUJIT_CTPBUILD -#error Cannot support AVX in a CTP JIT -#endif // RYUJIT_CTPBUILD opts.compCanUseAVX = false; if (((compileFlags & CORJIT_FLG_PREJIT) == 0) && ((compileFlags & CORJIT_FLG_USE_AVX2) != 0)) @@ -1678,8 +1843,15 @@ bool Compiler::compIsFullTrust() } -bool Compiler::compShouldThrowOnNoway() +bool Compiler::compShouldThrowOnNoway( +#ifdef FEATURE_TRACELOGGING + const char* filename, unsigned line +#endif +) { +#ifdef FEATURE_TRACELOGGING + compJitTelemetry.NotifyNowayAssert(filename, line); +#endif // In min opts, we don't want the noway assert to go through the exception // path. Instead we want it to just silently go through codegen for // compat reasons. @@ -1911,8 +2083,31 @@ void Compiler::compInitOptions(unsigned compileFlags) } bool verboseDump = false; + bool dumpIR = false; + bool dumpIRTypes = false; + bool dumpIRLocals = false; + bool dumpIRRegs = false; + bool dumpIRSsa = false; + bool dumpIRValnums = false; + bool dumpIRCosts = false; + bool dumpIRFlags = false; + bool dumpIRKinds = false; + bool dumpIRNodes = false; + bool dumpIRNoLists = false; + bool dumpIRNoLeafs = false; + bool dumpIRNoStmts = false; + bool dumpIRTrees = false; + bool dumpIRLinear = false; + bool dumpIRDataflow = false; + bool dumpIRBlockHeaders = false; + bool dumpIRExit = false; + LPCWSTR dumpIRPhase = nullptr; + LPCWSTR dumpIRFormat = nullptr; + if (!altJitConfig || opts.altJit) { + LPCWSTR dumpIRFormat = nullptr; + if (opts.eeFlags & CORJIT_FLG_PREJIT) { static ConfigMethodSet fNgenDump; @@ -1925,6 +2120,23 @@ void Compiler::compInitOptions(unsigned compileFlags) unsigned ngenHashDumpVal = (unsigned) fNgenHashDump.val(CLRConfig::INTERNAL_NgenHashDump); if ((ngenHashDumpVal != (DWORD)-1) && (ngenHashDumpVal == info.compMethodHash())) verboseDump = true; + + static ConfigMethodSet fNgenDumpIR; + fNgenDumpIR.ensureInit(CLRConfig::INTERNAL_NgenDumpIR); + + if (fNgenDumpIR.contains(info.compMethodName, info.compClassName, info.compMethodInfo->args.pSig)) + dumpIR = true; + + static ConfigDWORD fNgenHashDumpIR; + unsigned ngenHashDumpIRVal = (unsigned) fNgenHashDumpIR.val(CLRConfig::INTERNAL_NgenHashDumpIR); + if ((ngenHashDumpIRVal != (DWORD)-1) && (ngenHashDumpIRVal == info.compMethodHash())) + dumpIR = true; + + static ConfigString ngenDumpIRFormat; + dumpIRFormat = ngenDumpIRFormat.val(CLRConfig::INTERNAL_NgenDumpIRFormat); + + static ConfigString ngenDumpIRPhase; + dumpIRPhase = ngenDumpIRPhase.val(CLRConfig::INTERNAL_NgenDumpIRPhase); } else { @@ -1938,6 +2150,224 @@ void Compiler::compInitOptions(unsigned compileFlags) unsigned jitHashDumpVal = (unsigned) fJitHashDump.val(CLRConfig::INTERNAL_JitHashDump); if ((jitHashDumpVal != (DWORD)-1) && (jitHashDumpVal == info.compMethodHash())) verboseDump = true; + + static ConfigMethodSet fJitDumpIR; + fJitDumpIR.ensureInit(CLRConfig::INTERNAL_JitDumpIR); + + if (fJitDumpIR.contains(info.compMethodName, info.compClassName, info.compMethodInfo->args.pSig)) + dumpIR = true; + + static ConfigDWORD fJitHashDumpIR; + unsigned jitHashDumpIRVal = (unsigned) fJitHashDumpIR.val(CLRConfig::INTERNAL_JitHashDumpIR); + if ((jitHashDumpIRVal != (DWORD)-1) && (jitHashDumpIRVal == info.compMethodHash())) + dumpIR = true; + + static ConfigString jitDumpIRFormat; + dumpIRFormat = jitDumpIRFormat.val(CLRConfig::INTERNAL_JitDumpIRFormat); + + static ConfigString jitDumpIRPhase; + dumpIRPhase = jitDumpIRPhase.val(CLRConfig::INTERNAL_JitDumpIRPhase); + } + + if (dumpIRPhase == nullptr) + { + dumpIRPhase = W("*"); + } + + this->dumpIRPhase = dumpIRPhase; + + if (dumpIRFormat != nullptr) + { + this->dumpIRFormat = dumpIRFormat; + } + + dumpIRTrees = false; + dumpIRLinear = true; + if (dumpIRFormat != nullptr) + { + for (LPCWSTR p = dumpIRFormat; (*p != 0); ) + { + for (; (*p != 0); p++) + { + if (*p != L' ') + break; + } + + if (*p == 0) + { + break; + } + + static bool dumpedHelp = false; + + if ((*p == L'?') && (!dumpedHelp)) + { + printf("*******************************************************************************\n"); + printf("\n"); + dFormatIR(); + printf("\n"); + printf("\n"); + printf("Available specifiers (comma separated):\n"); + printf("\n"); + printf("? dump out value of COMPLUS_JitDumpIRFormat and this list of values\n"); + printf("\n"); + printf("linear linear IR dump (default)\n"); + printf("tree tree IR dump (traditional)\n"); + printf("mixed intermingle tree dump with linear IR dump\n"); + printf("\n"); + printf("dataflow use data flow form of linear IR dump\n"); + printf("structural use structural form of linear IR dump\n"); + printf("all implies structural, include everything\n"); + printf("\n"); + printf("kinds include tree node kinds in dump, example: \"kinds=[LEAF][LOCAL]\"\n"); + printf("flags include tree node flags in dump, example: \"flags=[CALL][GLOB_REF]\" \n"); + printf("types includes tree node types in dump, example: \".int\"\n"); + printf("locals include local numbers and tracking numbers in dump, example: \"(V3,T1)\"\n"); + printf("regs include register assignments in dump, example: \"(rdx)\"\n"); + printf("ssa include SSA numbers in dump, example: \"<d:3>\" or \"<u:3>\"\n"); + printf("valnums include Value numbers in dump, example: \"<v:$c4>\" or \"<v:$c4,$c5>\"\n"); + printf("\n"); + printf("nolist exclude GT_LIST nodes from dump\n"); + printf("noleafs exclude LEAF nodes from dump (fold into operations)\n"); + printf("nostmts exclude GT_STMTS from dump (unless required by dependencies)\n"); + printf("\n"); + printf("blkhdrs include block headers\n"); + printf("exit exit program after last phase dump (used with single method)\n"); + printf("\n"); + printf("*******************************************************************************\n"); + dumpedHelp = true; + } + + if (wcsncmp(p, W("types"), 5) == 0) + { + dumpIRTypes = true; + } + + if (wcsncmp(p, W("locals"), 6) == 0) + { + dumpIRLocals = true; + } + + if (wcsncmp(p, W("regs"), 4) == 0) + { + dumpIRRegs = true; + } + + if (wcsncmp(p, W("ssa"), 3) == 0) + { + dumpIRSsa = true; + } + + if (wcsncmp(p, W("valnums"), 7) == 0) + { + dumpIRValnums = true; + } + + if (wcsncmp(p, W("costs"), 5) == 0) + { + dumpIRCosts = true; + } + + if (wcsncmp(p, W("flags"), 5) == 0) + { + dumpIRFlags = true; + } + + if (wcsncmp(p, W("kinds"), 5) == 0) + { + dumpIRKinds = true; + } + + if (wcsncmp(p, W("nodes"), 5) == 0) + { + dumpIRNodes = true; + } + + if (wcsncmp(p, W("exit"), 4) == 0) + { + dumpIRExit = true; + } + + if (wcsncmp(p, W("nolists"), 7) == 0) + { + dumpIRNoLists = true; + } + + if (wcsncmp(p, W("noleafs"), 7) == 0) + { + dumpIRNoLeafs = true; + } + + if (wcsncmp(p, W("nostmts"), 7) == 0) + { + dumpIRNoStmts = true; + } + + if (wcsncmp(p, W("trees"), 5) == 0) + { + dumpIRTrees = true; + dumpIRLinear = false; + } + + if (wcsncmp(p, W("structural"), 10) == 0) + { + dumpIRLinear = true; + dumpIRNoStmts = false; + dumpIRNoLeafs = false; + dumpIRNoLists = false; + } + + if (wcsncmp(p, W("all"), 3) == 0) + { + dumpIRLinear = true; + dumpIRKinds = true; + dumpIRFlags = true; + dumpIRTypes = true; + dumpIRLocals = true; + dumpIRRegs = true; + dumpIRSsa = true; + dumpIRValnums = true; + dumpIRCosts = true; + dumpIRNoStmts = false; + dumpIRNoLeafs = false; + dumpIRNoLists = false; + } + + if (wcsncmp(p, W("linear"), 6) == 0) + { + dumpIRTrees = false; + dumpIRLinear = true; + } + + if (wcsncmp(p, W("mixed"), 5) == 0) + { + dumpIRTrees = true; + dumpIRLinear = true; + } + + if (wcsncmp(p, W("dataflow"), 8) == 0) + { + dumpIRDataflow = true; + dumpIRNoLeafs = true; + dumpIRNoLists = true; + dumpIRNoStmts = true; + } + + if (wcsncmp(p, W("blkhdrs"), 7) == 0) + { + dumpIRBlockHeaders = true; + } + + + for (; (*p != 0); p++) + { + if (*p == L',') + { + p++; + break; + } + } + } } } @@ -1946,17 +2376,102 @@ void Compiler::compInitOptions(unsigned compileFlags) verbose = true; } + if (dumpIR) + { + this->dumpIR = true; + } + + if (dumpIRTypes) + { + this->dumpIRTypes = true; + } + + if (dumpIRLocals) + { + this->dumpIRLocals = true; + } + + if (dumpIRRegs) + { + this->dumpIRRegs = true; + } + + if (dumpIRSsa) + { + this->dumpIRSsa = true; + } + + if (dumpIRValnums) + { + this->dumpIRValnums = true; + } + + if (dumpIRCosts) + { + this->dumpIRCosts = true; + } + + if (dumpIRFlags) + { + this->dumpIRFlags = true; + } + + if (dumpIRKinds) + { + this->dumpIRKinds = true; + } + + if (dumpIRNodes) + { + this->dumpIRNodes = true; + } + + if (dumpIRNoLists) + { + this->dumpIRNoLists = true; + } + + if (dumpIRNoLeafs) + { + this->dumpIRNoLeafs = true; + } + + if (dumpIRNoLeafs && dumpIRDataflow) + { + this->dumpIRDataflow = true; + } + + if (dumpIRNoStmts) + { + this->dumpIRNoStmts = true; + } + + if (dumpIRTrees) + { + this->dumpIRTrees = true; + } + + if (dumpIRLinear) + { + this->dumpIRLinear = true; + } + + if (dumpIRBlockHeaders) + { + this->dumpIRBlockHeaders = true; + } + + if (dumpIRExit) + { + this->dumpIRExit = true; + } + #endif // DEBUG #ifdef FEATURE_SIMD #ifdef _TARGET_AMD64_ // Minimum bar for availing SIMD benefits is SSE2 on AMD64. -#ifdef RYUJIT_CTPBUILD - static ConfigDWORD fFeatureSIMD; - featureSIMD = (opts.compCanUseSSE2 && (fFeatureSIMD.val(CLRConfig::EXTERNAL_FeatureSIMD) != 0)); -#else // !RYUJIT_CTPBUILD featureSIMD = ((opts.eeFlags & CORJIT_FLG_FEATURE_SIMD) != 0); -#endif // !RYUJIT_CTPBUILD #endif // _TARGET_AMD64_ #endif // FEATURE_SIMD @@ -1983,6 +2498,7 @@ void Compiler::compInitOptions(unsigned compileFlags) #if FEATURE_TAILCALL_OPT // By default opportunistic tail call optimization is enabled opts.compTailCallOpt = true; + opts.compTailCallLoopOpt = true; #endif #ifdef DEBUG @@ -2225,6 +2741,11 @@ void Compiler::compInitOptions(unsigned compileFlags) { opts.compTailCallOpt = (UINT)_wtoi(strTailCallOpt) != 0; } + static ConfigDWORD fTailCallLoopOpt; + if (fTailCallLoopOpt.val(CLRConfig::EXTERNAL_TailCallLoopOpt) == 0) + { + opts.compTailCallLoopOpt = false; + } #endif opts.compMustInlinePInvokeCalli = (opts.eeFlags & CORJIT_FLG_IL_STUB) ? true : false; @@ -2242,6 +2763,14 @@ void Compiler::compInitOptions(unsigned compileFlags) opts.compReloc = (opts.eeFlags & CORJIT_FLG_RELOC) ? true : false; #endif +#ifdef DEBUG +#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) + // Whether encoding of absolute addr as PC-rel offset is enabled in RyuJIT + static ConfigDWORD fEnablePCRelAddr; + opts.compEnablePCRelAddr = (fEnablePCRelAddr.val(CLRConfig::INTERNAL_JitEnablePCRelAddr) != 0); +#endif +#endif //DEBUG + opts.compProcedureSplitting = (opts.eeFlags & CORJIT_FLG_PROCSPLIT) ? true : false; #ifdef _TARGET_ARM64_ @@ -2389,13 +2918,10 @@ void Compiler::compInitOptions(unsigned compileFlags) void JitDump(const char* pcFormat, ...) { - if (GetTlsCompiler()->verbose) - { - va_list lst; - va_start(lst, pcFormat); - logf_stdout(pcFormat, lst); - va_end(lst); - } + va_list lst; + va_start(lst, pcFormat); + logf_stdout(pcFormat, lst); + va_end(lst); } bool Compiler::compJitHaltMethod() @@ -2992,6 +3518,7 @@ void Compiler::compCompile(void * * methodCodePtr, unsigned compileFlags) { hashBv::Init(this); + VarSetOps::AssignAllowUninitRhs(this, compCurLife, VarSetOps::UninitVal()); /* The temp holding the secret stub argument is used by fgImport() when importing the intrinsic. */ @@ -3238,6 +3765,7 @@ void Compiler::compCompile(void * * methodCodePtr, if (!opts.MinOpts() && !opts.compDbgCode) { bool doSsa = true; + bool doEarlyProp = true; bool doValueNum = true; bool doLoopHoisting = true; bool doCopyProp = true; @@ -3247,6 +3775,7 @@ void Compiler::compCompile(void * * methodCodePtr, #ifdef DEBUG static ConfigDWORD fJitDoOptConfig[6]; doSsa = (fJitDoOptConfig[0].val(CLRConfig::INTERNAL_JitDoSsa) != 0); + doEarlyProp = doSsa && (fJitDoOptConfig[1].val(CLRConfig::INTERNAL_JitDoEarlyProp) != 0); doValueNum = doSsa && (fJitDoOptConfig[1].val(CLRConfig::INTERNAL_JitDoValueNumber) != 0); doLoopHoisting = doValueNum && (fJitDoOptConfig[2].val(CLRConfig::INTERNAL_JitDoLoopHoisting) != 0); doCopyProp = doValueNum && (fJitDoOptConfig[3].val(CLRConfig::INTERNAL_JitDoCopyProp) != 0); @@ -3260,6 +3789,13 @@ void Compiler::compCompile(void * * methodCodePtr, EndPhase(PHASE_BUILD_SSA); } + if (doEarlyProp) + { + /* Propagate array length and rewrite getType() method call */ + optEarlyProp(); + EndPhase(PHASE_EARLY_PROP); + } + if (doValueNum) { fgValueNumber(); @@ -3296,7 +3832,6 @@ void Compiler::compCompile(void * * methodCodePtr, if (doRangeAnalysis) { /* Optimize array index range checks */ - // optOptimizeIndexChecks(); RangeCheck rc(this); rc.OptimizeRangeChecks(); EndPhase(PHASE_OPTIMIZE_INDEX_CHECKS); @@ -3315,14 +3850,15 @@ void Compiler::compCompile(void * * methodCodePtr, } } +#ifdef _TARGET_AMD64_ + // Check if we need to add the Quirk for the PPP backward compat issue + compQuirkForPPPflag = compQuirkForPPP(); +#endif + fgDetermineFirstColdBlock(); EndPhase(PHASE_DETERMINE_FIRST_COLD_BLOCK); #ifdef DEBUG - fgDumpXmlFlowGraph(); -#endif - -#ifdef DEBUG fgDebugCheckLinks(compStressCompile(STRESS_REMORPH_TREES, 50)); #endif @@ -3450,6 +3986,10 @@ void Compiler::compCompile(void * * methodCodePtr, RecordSqmStateAtEndOfCompilation(); #endif // FEATURE_CLRSQM +#ifdef FEATURE_TRACELOGGING + compJitTelemetry.NotifyEndOfCompilation(); +#endif + #if defined(DEBUG) || MEASURE_INLINING ++Compiler::jitTotalMethodCompiled; Compiler::jitTotalNumLocals += lvaCount; @@ -3484,6 +4024,82 @@ void Compiler::ProcessShutdownWork(ICorStaticInfo* statInfo) { } +#ifdef _TARGET_AMD64_ +// Check if we need to add the Quirk for the PPP backward compat issue. +// This Quirk addresses a compatibility issue between the new RyuJit and the previous JIT64. +// A backward compatibity issue called 'PPP' exists where a PInvoke call passes a 32-byte struct +// into a native API which basically writes 48 bytes of data into the struct. +// With the stack frame layout used by the RyuJIT the extra 16 bytes written corrupts a +// caller saved register and this leads to an A/V in the calling method. +// The older JIT64 jit compiler just happened to have a different stack layout and/or +// caller saved register set so that it didn't hit the A/V in the caller. +// By increasing the amount of stack allocted for the struct by 32 bytes we can fix this. +// +// Return true if we actually perform the Quirk, otherwise return false +// +bool Compiler::compQuirkForPPP() +{ + if (lvaCount != 2) // We require that there are exactly two locals + return false; + + if (compTailCallUsed) // Don't try this quirk if a tail call was used + return false; + + bool hasOutArgs = false; + LclVarDsc * varDscExposedStruct = nullptr; + + unsigned lclNum; + LclVarDsc * varDsc; + + /* Look for struct locals that are address taken */ + for (lclNum = 0, varDsc = lvaTable; + lclNum < lvaCount; + lclNum++, varDsc++) + { + if (varDsc->lvIsParam) // It can't be a parameter + { + continue; + } + + // We require that the OutgoingArg space lclVar exists + if (lclNum == lvaOutgoingArgSpaceVar) + { + hasOutArgs = true; // Record that we saw it + continue; + } + + // Look for a 32-byte address exposed Struct and record its varDsc + if ((varDsc->TypeGet() == TYP_STRUCT) && + varDsc->lvAddrExposed && + (varDsc->lvExactSize == 32) ) + { + varDscExposedStruct = varDsc; + } + } + + // We only perform the Quirk when there are two locals + // one of them is a address exposed struct of size 32 + // and the other is the outgoing arg space local + // + if (hasOutArgs && (varDscExposedStruct != nullptr)) + { +#ifdef DEBUG + if (verbose) + { + printf("\nAdding a backwards compatibility quirk for the 'PPP' issue\n"); + } +#endif // DEBUG + + // Increase the exact size of this struct by 32 bytes + // This fixes the PPP backward compat issue + varDscExposedStruct->lvExactSize += 32; + + return true; + } + return false; +} +#endif // _TARGET_AMD64_ + /*****************************************************************************/ #ifdef DEBUG @@ -3552,6 +4168,28 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd, forceFrameJIT = (void*) &me; // let us see the this pointer in fastchecked build // set this early so we can use it without relying on random memory values verbose = compIsForInlining()?impInlineInfo->InlinerCompiler->verbose:false; + + this->dumpIR = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIR : false; + this->dumpIRPhase = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRPhase : NULL; + this->dumpIRFormat = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRFormat : NULL; + this->dumpIRTypes = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRTypes : false; + this->dumpIRLocals = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRLocals : false; + this->dumpIRRegs = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRRegs : false; + this->dumpIRSsa = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRSsa : false; + this->dumpIRValnums = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRValnums : false; + this->dumpIRCosts = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRCosts : false; + this->dumpIRFlags = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRFlags : false; + this->dumpIRKinds = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRKinds : false; + this->dumpIRNodes = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRNodes: false; + this->dumpIRNoLists = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRNoLists : false; + this->dumpIRNoLeafs = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRNoLeafs : false; + this->dumpIRNoStmts = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRNoStmts : false; + this->dumpIRTrees = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRTrees : false; + this->dumpIRLinear = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRLinear : false; + this->dumpIRDataflow = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRDataflow : false; + this->dumpIRBlockHeaders = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRBlockHeaders : NULL; + this->dumpIRExit = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRExit : NULL; + info.compMethodHashPrivate = 0; #endif @@ -4103,6 +4741,22 @@ int Compiler::compCompileHelper (CORINFO_MODULE_HANDLE clas dumpILRange(info.compCode, info.compILCodeSize); } +#endif + + // Check for COMPLUS_AgressiveInlining + static ConfigDWORD fJitAggressiveInlining; + if (fJitAggressiveInlining.val(CLRConfig::INTERNAL_JitAggressiveInlining)) + { + compDoAggressiveInlining = true; + } + + if (compDoAggressiveInlining) + { + info.compFlags |= CORINFO_FLG_FORCEINLINE; + } + +#ifdef DEBUG + // Check for ForceInline stress. if (compStressCompile(STRESS_FORCE_INLINE, 0)) { @@ -4381,6 +5035,16 @@ _Next: { return CORJIT_SKIPPED; } + +#ifdef ALT_JIT +#ifdef DEBUG + static ConfigDWORD fRunAltJitCode; + if (fRunAltJitCode.val(CLRConfig::INTERNAL_RunAltJitCode) == 0) + { + return CORJIT_SKIPPED; + } +#endif // DEBUG +#endif // ALT_JIT } /* Success! */ @@ -5688,22 +6352,38 @@ void Compiler::compDispCallArgStats(FILE* fout) // Static variables CritSecObject CompTimeSummaryInfo::s_compTimeSummaryLock; CompTimeSummaryInfo CompTimeSummaryInfo::s_compTimeSummary; +#endif // FEATURE_JIT_METHOD_PERF + +#if defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS +const char* PhaseNames[] = +{ +#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) string_nm, +#include "compphases.h" +}; + +const char* PhaseEnums[] = +{ +#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) #enum_nm, +#include "compphases.h" +}; -const char* PhaseNames[] = +const LPCWSTR PhaseShortNames[] = { -#define CompPhaseNameMacro(enum_nm, string_nm, hasChildren, parent) string_nm, +#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) W(short_nm), #include "compphases.h" }; +#endif // defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS +#ifdef FEATURE_JIT_METHOD_PERF bool PhaseHasChildren[] = { -#define CompPhaseNameMacro(enum_nm, string_nm, hasChildren, parent) hasChildren, +#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) hasChildren, #include "compphases.h" }; int PhaseParent[] = { -#define CompPhaseNameMacro(enum_nm, string_nm, hasChildren, parent) parent, +#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) parent, #include "compphases.h" }; @@ -6243,6 +6923,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX * cVar, dVar : Display a local variable given its number (call lvaDumpEntry()). * cVarDsc, dVarDsc : Display a local variable given a LclVarDsc* (call lvaDumpEntry()). * cVars, dVars : Display the local variable table (call lvaTableDump()). + * cVarsFinal, dVarsFinal : Display the local variable table (call lvaTableDump(FINAL_FRAME_LAYOUT)). * cBlockCheapPreds, dBlockCheapPreds : Display a block's cheap predecessors (call block->dspCheapPreds()). * cBlockPreds, dBlockPreds : Display a block's predecessors (call block->dspPreds()). * cBlockSuccs, dBlockSuccs : Display a block's successors (call block->dspSuccs(compiler)). @@ -6252,6 +6933,26 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX * cCVarSet, dCVarSet : Display a "converted" VARSET_TP: the varset is assumed to be tracked variable indices. * These are converted to variable numbers and sorted. (Calls dumpConvertedVarSet()). * + * cFuncIR, dFuncIR : Display all the basic blocks of a function in linear IR form. + * cLoopIR, dLoopIR : Display a loop in linear IR form. + * dLoopNumIR : Display a loop (given number) in linear IR form. + * cBlockIR, dBlockIR : Display a basic block in linear IR form. + * cTreeIR, dTreeIR : Display a tree in linear IR form. + * dTabStopIR : Display spaces to the next tab stop column + * cTreeTypeIR dTreeTypeIR : Display tree type + * cTreeKindsIR dTreeKindsIR : Display tree kinds + * cTreeFlagsIR dTreeFlagsIR : Display tree flags + * cOperandIR dOperandIR : Display tree operand + * cLeafIR dLeafIR : Display tree leaf + * cIndirIR dIndirIR : Display indir tree as [t#] or [leaf] + * cListIR dListIR : Display tree list + * cSsaNumIR dSsaNumIR : Display SSA number as <u|d:#> + * cValNumIR dValNumIR : Display Value number as <v{l|c}:#{,R}> + * cDependsIR : Display dependencies of a tree DEP(t# ...) node + * based on child comma tree nodes + * dFormatIR : Display dump format specified on command line + * + * * The following don't require a Compiler* to work: * dVarSet : Display a VARSET_TP (call dumpVarSet()). * dRegMask : Display a regMaskTP (call dspRegMask(mask)). @@ -6321,6 +7022,13 @@ void cVars(Compiler* comp) comp->lvaTableDump(); } +void cVarsFinal(Compiler* comp) +{ + static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called + printf("===================================================================== *Vars %u\n", sequenceNumber++); + comp->lvaTableDump(Compiler::FINAL_FRAME_LAYOUT); +} + void cBlockCheapPreds(Compiler* comp, BasicBlock* block) { static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called @@ -6417,6 +7125,11 @@ void dVars() cVars(GetTlsCompiler()); } +void dVarsFinal() +{ + cVarsFinal(GetTlsCompiler()); +} + void dBlockPreds(BasicBlock* block) { cBlockPreds(GetTlsCompiler(), block); @@ -6472,6 +7185,2527 @@ dBlockList(BasicBlockList* list) } printf("\n"); } + +// Global variables available in debug mode. That are set by debug APIs for finding +// Trees, Stmts, and/or Blocks using id or bbNum. +// That can be used in watch window or as a way to get address of fields for data break points. + +GenTree* dbTree; +GenTreeStmt* dbStmt; +BasicBlock* dbTreeBlock; +BasicBlock* dbBlock; + +// Debug APIs for finding Trees, Stmts, and/or Blocks. +// As a side effect, they set the debug variables above. + +GenTree* dFindTree(GenTree* tree, unsigned id) +{ + GenTree* child; + + if (tree == nullptr) + { + return nullptr; + } + + if (tree->gtTreeID == id) + { + dbTree = tree; + return tree; + } + + unsigned childCount = tree->NumChildren(); + for (unsigned childIndex = 0; childIndex < childCount; childIndex++) + { + child = tree->GetChild(childIndex); + child = dFindTree(child, id); + if (child != nullptr) + { + return child; + } + } + + return nullptr; +} + +GenTree* dFindTree(unsigned id) +{ + Compiler* comp = GetTlsCompiler(); + BasicBlock* block; + GenTree* tree; + + dbTreeBlock = nullptr; + dbTree = nullptr; + + for (block = comp->fgFirstBB; block != nullptr; block = block->bbNext) + { + for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) + { + tree = dFindTree(stmt, id); + if (tree != nullptr) + { + dbTreeBlock = block; + return tree; + } + } + } + + return nullptr; +} + +GenTreeStmt* dFindStmt(unsigned id) +{ + Compiler* comp = GetTlsCompiler(); + BasicBlock* block; + + dbStmt = nullptr; + + unsigned stmtId = 0; + for (block = comp->fgFirstBB; block != nullptr; block = block->bbNext) + { + for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) + { + stmtId++; + if (stmtId == id) + { + dbStmt = stmt; + return stmt; + } + } + } + + return nullptr; +} + +BasicBlock* dFindBlock(unsigned bbNum) +{ + Compiler* comp = GetTlsCompiler(); + BasicBlock* block = nullptr; + + dbBlock = nullptr; + for (block = comp->fgFirstBB; block != nullptr; block = block->bbNext) + { + if (block->bbNum == bbNum) + { + dbBlock = block; + break; + } + } + + return block; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out function in linear IR form + */ + +void cFuncIR(Compiler* comp) +{ + BasicBlock* block; + + printf("Method %s::%s, hsh=0x%x\n", comp->info.compClassName, comp->info.compMethodName, + comp->info.compMethodHash()); + + printf("\n"); + + for (block = comp->fgFirstBB; block != nullptr; block = block->bbNext) + { + cBlockIR(comp, block); + } +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out the format specifiers from COMPLUS_JitDumpIRFormat + */ + +void dFormatIR() +{ + Compiler* comp = GetTlsCompiler(); + + if (comp->dumpIRFormat != NULL) + { + printf("COMPLUS_JitDumpIRFormat=%ls", comp->dumpIRFormat); + } +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out function in linear IR form + */ + +void dFuncIR() +{ + cFuncIR(GetTlsCompiler()); +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out loop in linear IR form + */ + +void cLoopIR(Compiler* comp, Compiler::LoopDsc* loop) +{ + BasicBlock* blockHead = loop->lpHead; + BasicBlock* blockFirst = loop->lpFirst; + BasicBlock* blockTop = loop->lpTop; + BasicBlock* blockEntry = loop->lpEntry; + BasicBlock* blockBottom = loop->lpBottom; + BasicBlock* blockExit = loop->lpExit; + BasicBlock* blockLast = blockBottom->bbNext; + BasicBlock* block; + + printf("LOOP\n"); + printf("\n"); + printf("HEAD BB%02u\n", blockHead->bbNum); + printf("FIRST BB%02u\n", blockFirst->bbNum); + printf("TOP BB%02u\n", blockTop->bbNum); + printf("ENTRY BB%02u\n", blockEntry->bbNum); + if (loop->lpExitCnt == 1) + { + printf("EXIT BB%02u\n", blockExit->bbNum); + } + else + { + printf("EXITS %u", loop->lpExitCnt); + } + printf("BOTTOM BB%02u\n", blockBottom->bbNum); + printf("\n"); + + cBlockIR(comp, blockHead); + for (block = blockFirst; ((block != nullptr) && (block != blockLast)); block = block->bbNext) + { + cBlockIR(comp, block); + } +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out loop in linear IR form + */ + +void dLoopIR(Compiler::LoopDsc* loop) +{ + cLoopIR(GetTlsCompiler(), loop); +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out loop (given loop number) in linear IR form + */ + +void dLoopNumIR(unsigned loopNum) +{ + Compiler* comp = GetTlsCompiler(); + + if (loopNum >= comp->optLoopCount) + { + printf("loopNum %u out of range\n"); + return; + } + + Compiler::LoopDsc* loop = &comp->optLoopTable[loopNum]; + cLoopIR(GetTlsCompiler(), loop); +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump spaces to specified tab stop + */ + +int dTabStopIR(int curr, int tabstop) +{ + int chars = 0; + + if (tabstop <= curr) + chars += printf(" "); + + for (int i = curr; i < tabstop; i++) + chars += printf(" "); + + return chars; +} + +void cNodeIR(Compiler* comp, GenTree* tree); + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out block in linear IR form + */ + +void cBlockIR(Compiler* comp, BasicBlock* block) +{ + bool noStmts = comp->dumpIRNoStmts; + bool trees = comp->dumpIRTrees; + + if (comp->dumpIRBlockHeaders) + { + block->dspBlockHeader(comp); + } + else + { + printf("BB%02u:\n", block->bbNum); + } + + printf("\n"); + for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) + { + // Skip embedded stmts. They should have already been dumped prior to the stmt + // that they are embedded into. Even though they appear on the stmt list + // after the stmt they are embedded into. Don't understand the rationale for that + // but make the dataflow view look consistent. + + if ((stmt->gtFlags & GTF_STMT_TOP_LEVEL) == 0) + { + continue; + } + + // Print current stmt. + + if (trees) + { + cTree(comp, stmt); + printf("\n"); + printf("=====================================================================\n"); + } + + if (comp->compRationalIRForm) + { + GenTree* tree; + + foreach_treenode_execution_order(tree, stmt) + { + cNodeIR(comp, tree); + } + } + else + { + cTreeIR(comp, stmt); + } + + if (!noStmts && !trees) + { + printf("\n"); + } + } + + int chars = 0; + + chars += dTabStopIR(chars, COLUMN_OPCODE); + + chars += printf(" "); + switch (block->bbJumpKind) + { + case BBJ_EHFINALLYRET: + chars += printf("BRANCH(EHFINALLYRET)"); + break; + + case BBJ_EHFILTERRET: + chars += printf("BRANCH(EHFILTERRET)"); + break; + + case BBJ_EHCATCHRET: + chars += printf("BRANCH(EHCATCHRETURN)"); + chars += dTabStopIR(chars, COLUMN_OPERANDS); + chars += printf(" BB%02u",block->bbJumpDest->bbNum); + break; + + case BBJ_THROW: + chars += printf("BRANCH(THROW)"); + break; + + case BBJ_RETURN: + chars += printf("BRANCH(RETURN)"); + break; + + case BBJ_NONE: + // For fall-through blocks + chars += printf("BRANCH(NONE)"); + break; + + case BBJ_ALWAYS: + chars += printf("BRANCH(ALWAYS)"); + chars += dTabStopIR(chars, COLUMN_OPERANDS); + chars += printf(" BB%02u",block->bbJumpDest->bbNum); + if (block->bbFlags & BBF_KEEP_BBJ_ALWAYS) + { + chars += dTabStopIR(chars, COLUMN_KINDS); + chars += printf("; [KEEP_BBJ_ALWAYS]"); + } + break; + + case BBJ_LEAVE: + chars += printf("BRANCH(LEAVE)"); + chars += dTabStopIR(chars, COLUMN_OPERANDS); + chars += printf(" BB%02u", block->bbJumpDest->bbNum); + break; + + case BBJ_CALLFINALLY: + chars += printf("BRANCH(CALLFINALLY)"); + chars += dTabStopIR(chars, COLUMN_OPERANDS); + chars += printf(" BB%02u", block->bbJumpDest->bbNum); + break; + + case BBJ_COND: + chars += printf("BRANCH(COND)"); + chars += dTabStopIR(chars, COLUMN_OPERANDS); + chars += printf(" BB%02u", block->bbJumpDest->bbNum); + break; + + case BBJ_SWITCH: + chars += printf("BRANCH(SWITCH)"); + chars += dTabStopIR(chars, COLUMN_OPERANDS); + + unsigned jumpCnt; + jumpCnt = block->bbJumpSwt->bbsCount; + BasicBlock** jumpTab; + jumpTab = block->bbJumpSwt->bbsDstTab; + do + { + chars += printf("%c BB%02u", + (jumpTab == block->bbJumpSwt->bbsDstTab) ? ' ' : ',', + (*jumpTab)->bbNum); + } + while (++jumpTab, --jumpCnt); + break; + + default: + unreached(); + break; + } + + printf("\n"); + if (block->bbNext != NULL) + { + printf("\n"); + } +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out block in linear IR form + */ + +void dBlockIR(BasicBlock* block) +{ + cBlockIR(GetTlsCompiler(), block); +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree node type for linear IR form + */ + +int cTreeTypeIR(Compiler *comp, GenTree *tree) +{ + int chars = 0; + + var_types type = tree->TypeGet(); + + const char * typeName = varTypeName(type); + chars += printf(".%s", typeName); + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree node type for linear IR form + */ + +int dTreeTypeIR(GenTree *tree) +{ + int chars = cTreeTypeIR(GetTlsCompiler(), tree); + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree node kind for linear IR form + */ + +int cTreeKindsIR(Compiler *comp, GenTree *tree) +{ + int chars = 0; + + unsigned kind = tree->OperKind(); + + chars += printf("kinds="); + if (kind == GTK_SPECIAL) + chars += printf("[SPECIAL]"); + if (kind & GTK_CONST) + chars += printf("[CONST]"); + if (kind & GTK_LEAF) + chars += printf("[LEAF]"); + if (kind & GTK_UNOP) + chars += printf("[UNOP]"); + if (kind & GTK_BINOP) + chars += printf("[BINOP]"); + if (kind & GTK_LOGOP) + chars += printf("[LOGOP]"); + if (kind & GTK_ASGOP) + chars += printf("[ASGOP]"); + if (kind & GTK_COMMUTE) + chars += printf("[COMMUTE]"); + if (kind & GTK_EXOP) + chars += printf("[EXOP]"); + if (kind & GTK_LOCAL) + chars += printf("[LOCAL]"); + if (kind & GTK_SMPOP) + chars += printf("[SMPOP]"); + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree node kind for linear IR form + */ + +int dTreeKindsIR(GenTree *tree) +{ + int chars = cTreeKindsIR(GetTlsCompiler(), tree); + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree node flags for linear IR form + */ + +int cTreeFlagsIR(Compiler *comp, GenTree *tree) +{ + int chars = 0; + + if (tree->gtFlags != 0) + { + if (!comp->dumpIRNodes) + { + if ((tree->gtFlags & (~(GTF_NODE_LARGE|GTF_NODE_SMALL))) == 0) + { + return chars; + } + } + + chars += printf("flags="); + + // Node flags + +#if defined(DEBUG) && SMALL_TREE_NODES + if (comp->dumpIRNodes) + { + if (tree->gtFlags & GTF_NODE_LARGE) + { + chars += printf("[NODE_LARGE]"); + } + if (tree->gtFlags & GTF_NODE_SMALL) + { + chars += printf("[NODE_SMALL]"); + } + } +#endif + if (tree->gtFlags & GTF_MORPHED) + { + chars += printf("[MORPHED]"); + } + if (tree->gtFlags & GTF_COLON_COND) + { + chars += printf("[COLON_COND]"); + } + + // Operator flags + + genTreeOps op = tree->OperGet(); + switch (op) + { + + case GT_LCL_VAR: + case GT_LCL_VAR_ADDR: + case GT_LCL_FLD: + case GT_LCL_FLD_ADDR: + case GT_STORE_LCL_FLD: + case GT_STORE_LCL_VAR: + case GT_REG_VAR: + + if (tree->gtFlags & GTF_VAR_DEF) + { + chars += printf("[VAR_DEF]"); + } + if (tree->gtFlags & GTF_VAR_USEASG) + { + chars += printf("[VAR_USEASG]"); + } + if (tree->gtFlags & GTF_VAR_USEDEF) + { + chars += printf("[VAR_USEDEF]"); + } + if (tree->gtFlags & GTF_VAR_CAST) + { + chars += printf("[VAR_CAST]"); + } + if (tree->gtFlags & GTF_VAR_ITERATOR) + { + chars += printf("[VAR_ITERATOR]"); + } + if (tree->gtFlags & GTF_VAR_CLONED) + { + chars += printf("[VAR_CLONED]"); + } + if (tree->gtFlags & GTF_VAR_DEATH) + { + chars += printf("[VAR_DEATH]"); + } + if (tree->gtFlags & GTF_VAR_ARR_INDEX) + { + chars += printf("[VAR_ARR_INDEX]"); + } + if (tree->gtFlags & GTFD_VAR_CSE_REF) + { + chars += printf("[VAR_CSE_REF]"); + } + if (op == GT_REG_VAR) + { + if (tree->gtFlags & GTF_REG_BIRTH) + { + chars += printf("[REG_BIRTH]"); + } + } + break; + + case GT_NOP: + + if (tree->gtFlags & GTF_NOP_DEATH) + { + chars += printf("[NOP_DEATH]"); + } + break; + + case GT_NO_OP: + + if (tree->gtFlags & GTF_NO_OP_NO) + { + chars += printf("[NO_OP_NO]"); + } + break; + + case GT_FIELD: + + if (tree->gtFlags & GTF_FLD_NULLCHECK) + { + chars += printf("[FLD_NULLCHECK]"); + } + if (tree->gtFlags & GTF_FLD_VOLATILE) + { + chars += printf("[FLD_VOLATILE]"); + } + break; + + case GT_INDEX: + + if (tree->gtFlags & GTF_INX_RNGCHK) + { + chars += printf("[INX_RNGCHK]"); + } + if (tree->gtFlags & GTF_INX_REFARR_LAYOUT) + { + chars += printf("[INX_REFARR_LAYOUT]"); + } + if (tree->gtFlags & GTF_INX_STRING_LAYOUT) + { + chars += printf("[INX_STRING_LAYOUT]"); + } + break; + + case GT_IND: + case GT_STOREIND: + + if (tree->gtFlags & GTF_IND_VOLATILE) + { + chars += printf("[IND_VOLATILE]"); + } + if (tree->gtFlags & GTF_IND_REFARR_LAYOUT) + { + chars += printf("[IND_REFARR_LAYOUT]"); + } + if (tree->gtFlags & GTF_IND_TGTANYWHERE) + { + chars += printf("[IND_TGTANYWHERE]"); + } + if (tree->gtFlags & GTF_IND_TLS_REF) + { + chars += printf("[IND_TLS_REF]"); + } + if (tree->gtFlags & GTF_IND_ASG_LHS) + { + chars += printf("[IND_ASG_LHS]"); + } + if (tree->gtFlags & GTF_IND_UNALIGNED) + { + chars += printf("[IND_UNALIGNED]"); + } + if (tree->gtFlags & GTF_IND_INVARIANT) + { + chars += printf("[IND_INVARIANT]"); + } + if (tree->gtFlags & GTF_IND_ARR_LEN) + { + chars += printf("[IND_ARR_INDEX]"); + } + break; + + case GT_CLS_VAR: + + if (tree->gtFlags & GTF_CLS_VAR_ASG_LHS) + { + chars += printf("[CLS_VAR_ASG_LHS]"); + } + break; + + case GT_ADDR: + + if (tree->gtFlags & GTF_ADDR_ONSTACK) + { + chars += printf("[ADDR_ONSTACK]"); + } + break; + + case GT_MUL: + + if (tree->gtFlags & GTF_MUL_64RSLT) + { + chars += printf("[64RSLT]"); + } + if (tree->gtFlags & GTF_ADDRMODE_NO_CSE) + { + chars += printf("[ADDRMODE_NO_CSE]"); + } + break; + + case GT_ADD: + + if (tree->gtFlags & GTF_ADDRMODE_NO_CSE) + { + chars += printf("[ADDRMODE_NO_CSE]"); + } + break; + + case GT_LSH: + + if (tree->gtFlags & GTF_ADDRMODE_NO_CSE) + { + chars += printf("[ADDRMODE_NO_CSE]"); + } + break; + + case GT_MOD: + case GT_UMOD: + + if (tree->gtFlags & GTF_MOD_INT_RESULT) + { + chars += printf("[MOD_INT_RESULT]"); + } + break; + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GT: + case GT_GE: + + if (tree->gtFlags & GTF_RELOP_NAN_UN) + { + chars += printf("[RELOP_NAN_UN]"); + } + if (tree->gtFlags & GTF_RELOP_JMP_USED) + { + chars += printf("[RELOP_JMP_USED]"); + } + if (tree->gtFlags & GTF_RELOP_QMARK) + { + chars += printf("[RELOP_QMARK]"); + } + if (tree->gtFlags & GTF_RELOP_SMALL) + { + chars += printf("[RELOP_SMALL]"); + } + break; + + case GT_QMARK: + + if (tree->gtFlags & GTF_QMARK_CAST_INSTOF) + { + chars += printf("[QMARK_CAST_INSTOF]"); + } + break; + + case GT_BOX: + + if (tree->gtFlags & GTF_BOX_VALUE) + { + chars += printf("[BOX_VALUE]"); + } + break; + + case GT_CNS_INT: + + { + unsigned handleKind = (tree->gtFlags & GTF_ICON_HDL_MASK); + + switch (handleKind) + { + + case GTF_ICON_SCOPE_HDL: + + chars += printf("[ICON_SCOPE_HDL]"); + break; + + case GTF_ICON_CLASS_HDL: + + chars += printf("[ICON_CLASS_HDL]"); + break; + + case GTF_ICON_METHOD_HDL: + + chars += printf("[ICON_METHOD_HDL]"); + break; + + case GTF_ICON_FIELD_HDL: + + chars += printf("[ICON_FIELD_HDL]"); + break; + + case GTF_ICON_STATIC_HDL: + + chars += printf("[ICON_STATIC_HDL]"); + break; + + case GTF_ICON_STR_HDL: + + chars += printf("[ICON_STR_HDL]"); + break; + + case GTF_ICON_PSTR_HDL: + + chars += printf("[ICON_PSTR_HDL]"); + break; + + case GTF_ICON_PTR_HDL: + + chars += printf("[ICON_PTR_HDL]"); + break; + + case GTF_ICON_VARG_HDL: + + chars += printf("[ICON_VARG_HDL]"); + break; + + case GTF_ICON_PINVKI_HDL: + + chars += printf("[ICON_PINVKI_HDL]"); + break; + + case GTF_ICON_TOKEN_HDL: + + chars += printf("[ICON_TOKEN_HDL]"); + break; + + case GTF_ICON_TLS_HDL: + + chars += printf("[ICON_TLD_HDL]"); + break; + + case GTF_ICON_FTN_ADDR: + + chars += printf("[ICON_FTN_ADDR]"); + break; + + case GTF_ICON_CIDMID_HDL: + + chars += printf("[ICON_CIDMID_HDL]"); + break; + + case GTF_ICON_BBC_PTR: + + chars += printf("[ICON_BBC_PTR]"); + break; + + case GTF_ICON_FIELD_OFF: + + chars += printf("[ICON_FIELD_OFF]"); + break; + } + } + break; + + case GT_COPYBLK: + case GT_INITBLK: + case GT_COPYOBJ: + + if (tree->gtFlags & GTF_BLK_HASGCPTR) + { + chars += printf("[BLK_HASGCPTR]"); + } + if (tree->gtFlags & GTF_BLK_VOLATILE) + { + chars += printf("[BLK_VOLATILE]"); + } + if (tree->gtFlags & GTF_BLK_UNALIGNED) + { + chars += printf("[BLK_UNALIGNED]"); + } + break; + + case GT_CALL: + + if (tree->gtFlags & GTF_CALL_UNMANAGED) + { + chars += printf("[CALL_UNMANAGED]"); + } + if (tree->gtFlags & GTF_CALL_INLINE_CANDIDATE) + { + chars += printf("[CALL_INLINE_CANDIDATE]"); + } + if (tree->gtFlags & GTF_CALL_NONVIRT) + { + chars += printf("[CALL_NONVIRT]"); + } + if (tree->gtFlags & GTF_CALL_VIRT_VTABLE) + { + chars += printf("[CALL_VIRT_VTABLE]"); + } + if (tree->gtFlags & GTF_CALL_VIRT_STUB) + { + chars += printf("[CALL_VIRT_STUB]"); + } + if (tree->gtFlags & GTF_CALL_NULLCHECK) + { + chars += printf("[CALL_NULLCHECK]"); + } + if (tree->gtFlags & GTF_CALL_POP_ARGS) + { + chars += printf("[CALL_POP_ARGS]"); + } + if (tree->gtFlags & GTF_CALL_HOISTABLE) + { + chars += printf("[CALL_HOISTABLE]"); + } + if (tree->gtFlags & GTF_CALL_REG_SAVE) + { + chars += printf("[CALL_REG_SAVE]"); + } + + // More flags associated with calls. + + { + GenTreeCall* call = tree->AsCall(); + + if (call->gtCallMoreFlags & GTF_CALL_M_EXPLICIT_TAILCALL) + { + chars += printf("[CALL_M_EXPLICIT_TAILCALL]"); + } + if (call->gtCallMoreFlags & GTF_CALL_M_TAILCALL) + { + chars += printf("[CALL_M_TAILCALL]"); + } + if (call->gtCallMoreFlags & GTF_CALL_M_VARARGS) + { + chars += printf("[CALL_M_VARARGS]"); + } + if (call->gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) + { + chars += printf("[CALL_M_RETBUFFARG]"); + } + if (call->gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) + { + chars += printf("[CALL_M_DELEGATE_INV]"); + } + if (call->gtCallMoreFlags & GTF_CALL_M_NOGCCHECK) + { + chars += printf("[CALL_M_NOGCCHECK]"); + } + if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) + { + chars += printf("[CALL_M_SPECIAL_INTRINSIC]"); + } + + if (call->IsUnmanaged()) + { + if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL) + { + chars += printf("[CALL_M_UNMGD_THISCALL]"); + } + } + else if (call->IsVirtualStub()) + { + if (call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) + { + chars += printf("[CALL_M_VIRTSTUB_REL_INDIRECT]"); + } + } + else if (!call->IsVirtual()) + { + if (call->gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS) + { + chars += printf("[CALL_M_NONVIRT_SAME_THIS]"); + } + } + + if (call->gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH) + { + chars += printf("[CALL_M_FRAME_VAR_DEATH]"); + } +#ifndef LEGACY_BACKEND + if (call->gtCallMoreFlags & GTF_CALL_M_TAILCALL_VIA_HELPER) + { + chars += printf("[CALL_M_TAILCALL_VIA_HELPER]"); + } +#endif +#if FEATURE_TAILCALL_OPT + if (call->gtCallMoreFlags & GTF_CALL_M_IMPLICIT_TAILCALL) + { + chars += printf("[CALL_M_IMPLICIT_TAILCALL]"); + } +#endif + if (call->gtCallMoreFlags & GTF_CALL_M_PINVOKE) + { + chars += printf("[CALL_M_PINVOKE]"); + } + } + break; + + case GT_STMT: + + if (tree->gtFlags & GTF_STMT_CMPADD) + { + chars += printf("[STMT_CMPADD]"); + } + if (tree->gtFlags & GTF_STMT_HAS_CSE) + { + chars += printf("[STMT_HAS_CSE]"); + } + if (tree->gtFlags & GTF_STMT_TOP_LEVEL) + { + chars += printf("[STMT_TOP_LEVEL]"); + } + if (tree->gtFlags & GTF_STMT_SKIP_LOWER) + { + chars += printf("[STMT_SKIP_LOWER]"); + } + break; + + default: + + { + unsigned flags = (tree->gtFlags & (~(unsigned)(GTF_COMMON_MASK|GTF_OVERFLOW))); + if (flags != 0) + chars += printf("[%08X]", flags); + } + break; + } + + // Common flags. + + if (tree->gtFlags & GTF_ASG) + { + chars += printf("[ASG]"); + } + if (tree->gtFlags & GTF_CALL) + { + chars += printf("[CALL]"); + } + switch (op) + { + case GT_MUL: + case GT_CAST: + case GT_ADD: + case GT_SUB: + case GT_ASG_ADD: + case GT_ASG_SUB: + if (tree->gtFlags & GTF_OVERFLOW) + { + chars += printf("[OVERFLOW]"); + } + break; + default: + break; + } + if (tree->gtFlags & GTF_EXCEPT) + { + chars += printf("[EXCEPT]"); + } + if (tree->gtFlags & GTF_GLOB_REF) + { + chars += printf("[GLOB_REF]"); + } + if (tree->gtFlags & GTF_ORDER_SIDEEFF) + { + chars += printf("[ORDER_SIDEEFF]"); + } + if (tree->gtFlags & GTF_REVERSE_OPS) + { + if (op != GT_LCL_VAR) + { + chars += printf("[REVERSE_OPS]"); + } + } + if (tree->gtFlags & GTF_REG_VAL) + { + chars += printf("[REG_VAL]"); + } + if (tree->gtFlags & GTF_SPILLED) + { + chars += printf("[SPILLED_OPER]"); + } +#if defined(LEGACY_BACKEND) + if (tree->gtFlags & GTF_SPILLED_OP2) + { + chars += printf("[SPILLED_OP2]"); + } +#endif + if (tree->gtFlags & GTF_REDINDEX_CHECK) + { + chars += printf("[REDINDEX_CHECK]"); + } + if (tree->gtFlags & GTF_REDINDEX_CHECK) + { + chars += printf("[REDINDEX_CHECK]"); + } + if (tree->gtFlags & GTF_ZSF_SET) + { + chars += printf("[ZSF_SET]"); + } +#if FEATURE_SET_FLAGS + if (tree->gtFlags & GTF_SET_FLAGS) + { + if ((op != GT_IND) && (op != GT_STOREIND)) + { + chars += printf("[ZSF_SET_FLAGS]"); + } + } +#endif + if (tree->gtFlags & GTF_IND_NONFAULTING) + { + if ((op == GT_IND) || (op == GT_STOREIND)) + { + chars += printf("[IND_NONFAULTING]"); + } + } +#if FEATURE_ANYCSE + if (tree->gtFlags & GTF_DEAD) + { + chars += printf("[DEAD]"); + } +#endif + if (tree->gtFlags & GTF_MAKE_CSE) + { + chars += printf("[MAKE_CSE]"); + } + if (tree->gtFlags & GTF_DONT_CSE) + { + chars += printf("[DONT_CSE]"); + } + if (tree->gtFlags & GTF_BOOLEAN) + { + chars += printf("[BOOLEAN]"); + } + if (tree->gtFlags & GTF_SMALL_OK) + { + chars += printf("[SMALL_OK]"); + } + if (tree->gtFlags & GTF_UNSIGNED) + { + chars += printf("[SMALL_UNSIGNED]"); + } + if (tree->gtFlags & GTF_LATE_ARG) + { + chars += printf("[SMALL_LATE_ARG]"); + } + if (tree->gtFlags & GTF_SPILL) + { + chars += printf("[SPILL]"); + } + if (tree->gtFlags & GTF_SPILL_HIGH) + { + chars += printf("[SPILL_HIGH]"); + } + if (tree->gtFlags & GTF_REUSE_REG_VAL) + { + if (op == GT_CNS_INT) + { + chars += printf("[REUSE_REG_VAL]"); + } + } + } + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree node flags for linear IR form + */ + +int dTreeFlagsIR(GenTree *tree) +{ + int chars = cTreeFlagsIR(GetTlsCompiler(), tree); + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out SSA number on tree node for linear IR form + */ + +int cSsaNumIR(Compiler *comp, GenTree *tree) +{ + int chars = 0; + + if (tree->gtLclVarCommon.HasSsaName()) + { + if (tree->gtFlags & GTF_VAR_USEASG) + { + assert(tree->gtFlags & GTF_VAR_DEF); + chars += printf("<u:%d><d:%d>", tree->gtLclVarCommon.gtSsaNum, + comp->GetSsaNumForLocalVarDef(tree)); + } + else + { + chars += printf("<%s:%d>", (tree->gtFlags & GTF_VAR_DEF) ? "d" : "u", + tree->gtLclVarCommon.gtSsaNum); + } + } + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out SSA number on tree node for linear IR form + */ + +int dSsaNumIR(GenTree *tree) +{ + int chars = cSsaNumIR(GetTlsCompiler(), tree); + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out Value Number on tree node for linear IR form + */ + +int cValNumIR(Compiler *comp, GenTree *tree) +{ + int chars = 0; + + if (tree->gtVNPair.GetLiberal() != ValueNumStore::NoVN) + { + assert(tree->gtVNPair.GetConservative() != ValueNumStore::NoVN); + ValueNumPair vnp = tree->gtVNPair; + ValueNum vn; + if (vnp.BothEqual()) + { + chars += printf("<v:"); + vn = vnp.GetLiberal(); + chars += printf(STR_VN "%x", vn); + if (ValueNumStore::isReservedVN(vn)) + { + chars += printf("R"); + } + chars += printf(">"); + } + else + { + vn = vnp.GetLiberal(); + chars += printf("<v:"); + chars += printf(STR_VN "%x", vn); + if (ValueNumStore::isReservedVN(vn)) + { + chars += printf("R"); + } + chars += printf(","); + vn = vnp.GetConservative(); + chars += printf(STR_VN "%x", vn); + if (ValueNumStore::isReservedVN(vn)) + { + chars += printf("R"); + } + chars += printf(">"); + } + } + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out Value Number on tree node for linear IR form + */ + +int dValNumIR(GenTree *tree) +{ + int chars = cValNumIR(GetTlsCompiler(), tree); + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree leaf node for linear IR form + */ + +int cLeafIR(Compiler *comp, GenTree* tree) +{ + int chars = 0; + genTreeOps op = tree->OperGet(); + const char* ilKind = nullptr; + const char* ilName = nullptr; + unsigned ilNum = 0; + unsigned lclNum = 0; + bool hasSsa = false; + + switch (op) + { + + case GT_PHI_ARG: + case GT_LCL_VAR: + case GT_LCL_VAR_ADDR: + case GT_STORE_LCL_VAR: + case GT_REG_VAR: + + lclNum = tree->gtLclVarCommon.gtLclNum; + comp->gtGetLclVarNameInfo(lclNum, &ilKind, &ilName, &ilNum); + if (ilName != nullptr) + { + chars += printf("%s", ilName); + } + else + { + LclVarDsc * varDsc = comp->lvaTable + lclNum; + chars += printf("%s%d", ilKind, ilNum); + if (comp->dumpIRLocals) + { + chars += printf("(V%02u", lclNum); + if (varDsc->lvTracked) + { + chars += printf(":T%02u", varDsc->lvVarIndex); + } + if (comp->dumpIRRegs) + { + if (varDsc->lvRegister) + { + if (isRegPairType(varDsc->TypeGet())) + { + chars += printf(":%s:%s", + getRegName(varDsc->lvOtherReg), // hi32 + getRegName(varDsc->lvRegNum)); // lo32 + } + else + { + chars += printf(":%s", getRegName(varDsc->lvRegNum)); + } + } + else + { + switch (tree->GetRegTag()) + { + case GenTree::GT_REGTAG_REG: + chars += printf(":%s", comp->compRegVarName(tree->gtRegNum)); + break; +#if CPU_LONG_USES_REGPAIR + case GenTree::GT_REGTAG_REGPAIR: + chars += printf(":%s", comp->compRegPairName(tree->gtRegPair)); + break; +#endif + default: + break; + } + } + } + chars += printf(")"); + } + else if (comp->dumpIRRegs) + { + if (varDsc->lvRegister) + { + chars += printf("("); + if (isRegPairType(varDsc->TypeGet())) + { + chars += printf("%s:%s", + getRegName(varDsc->lvOtherReg), // hi32 + getRegName(varDsc->lvRegNum)); // lo32 + } + else + { + chars += printf("%s", getRegName(varDsc->lvRegNum)); + } + chars += printf(")"); + } + else + { + switch (tree->GetRegTag()) + { + case GenTree::GT_REGTAG_REG: + chars += printf("(%s)", comp->compRegVarName(tree->gtRegNum)); + break; +#if CPU_LONG_USES_REGPAIR + case GenTree::GT_REGTAG_REGPAIR: + chars += printf("(%s)", comp->compRegPairName(tree->gtRegPair)); + break; +#endif + default: + break; + } + } + } + } + + if (op == GT_REG_VAR) + { + if (isFloatRegType(tree->gtType)) + { + assert(tree->gtRegVar.gtRegNum == tree->gtRegNum); + chars += printf("(FPV%u)", tree->gtRegNum); + } + else + { + chars += printf("(%s)", comp->compRegVarName(tree->gtRegVar.gtRegNum)); + } + } + + hasSsa = true; + break; + + case GT_LCL_FLD: + case GT_LCL_FLD_ADDR: + case GT_STORE_LCL_FLD: + + lclNum = tree->gtLclVarCommon.gtLclNum; + comp->gtGetLclVarNameInfo(lclNum, &ilKind, &ilName, &ilNum); + if (ilName != nullptr) + { + chars += printf("%s+%u", ilName, tree->gtLclFld.gtLclOffs); + } + else + { + chars += printf("%s%d+%u", ilKind, ilNum, tree->gtLclFld.gtLclOffs); + LclVarDsc * varDsc = comp->lvaTable + lclNum; + if (comp->dumpIRLocals) + { + chars += printf("(V%02u", lclNum); + if (varDsc->lvTracked) + { + chars += printf(":T%02u", varDsc->lvVarIndex); + } + if (comp->dumpIRRegs) + { + if (varDsc->lvRegister) + { + if (isRegPairType(varDsc->TypeGet())) + { + chars += printf(":%s:%s", + getRegName(varDsc->lvOtherReg), // hi32 + getRegName(varDsc->lvRegNum)); // lo32 + } + else + { + chars += printf(":%s", getRegName(varDsc->lvRegNum)); + } + } + else + { + switch (tree->GetRegTag()) + { + case GenTree::GT_REGTAG_REG: + chars += printf(":%s", comp->compRegVarName(tree->gtRegNum)); + break; +#if CPU_LONG_USES_REGPAIR + case GenTree::GT_REGTAG_REGPAIR: + chars += printf(":%s", comp->compRegPairName(tree->gtRegPair)); + break; +#endif + default: + break; + } + } + } + chars += printf(")"); + } + else if (comp->dumpIRRegs) + { + if (varDsc->lvRegister) + { + chars += printf("("); + if (isRegPairType(varDsc->TypeGet())) + { + chars += printf("%s:%s", + getRegName(varDsc->lvOtherReg), // hi32 + getRegName(varDsc->lvRegNum)); // lo32 + } + else + { + chars += printf("%s", getRegName(varDsc->lvRegNum)); + } + chars += printf(")"); + } + else + { + switch (tree->GetRegTag()) + { + case GenTree::GT_REGTAG_REG: + chars += printf("(%s)", comp->compRegVarName(tree->gtRegNum)); + break; +#if CPU_LONG_USES_REGPAIR + case GenTree::GT_REGTAG_REGPAIR: + chars += printf("(%s)", comp->compRegPairName(tree->gtRegPair)); + break; +#endif + default: + break; + } + } + } + } + + // TODO: We probably want to expand field sequence. + // gtDispFieldSeq(tree->gtLclFld.gtFieldSeq); + + hasSsa = true; + break; + + case GT_CNS_INT: + + if (tree->IsIconHandle()) + { +#if 0 + // TODO: Commented out because sometimes the CLR throws + // and exception when asking the names of some handles. + // Need to investigate. + + const char* className; + const char* fieldName; + const char* methodName; + const wchar_t* str; + + switch (tree->GetIconHandleFlag()) + { + + case GTF_ICON_SCOPE_HDL: + + chars += printf("SCOPE(?)"); + break; + + case GTF_ICON_CLASS_HDL: + + className = comp->eeGetClassName((CORINFO_CLASS_HANDLE)tree->gtIntCon.gtIconVal); + chars += printf("CLASS(%s)", className); + break; + + case GTF_ICON_METHOD_HDL: + + methodName = comp->eeGetMethodName((CORINFO_METHOD_HANDLE)tree->gtIntCon.gtIconVal, + &className); + chars += printf("METHOD(%s.%s)", className, methodName); + break; + + case GTF_ICON_FIELD_HDL: + + fieldName = comp->eeGetFieldName((CORINFO_FIELD_HANDLE)tree->gtIntCon.gtIconVal, + &className); + chars += printf("FIELD(%s.%s) ", className, fieldName); + break; + + case GTF_ICON_STATIC_HDL: + + fieldName = comp->eeGetFieldName((CORINFO_FIELD_HANDLE)tree->gtIntCon.gtIconVal, + &className); + chars += printf("STATIC_FIELD(%s.%s)", className, fieldName); + break; + + case GTF_ICON_STR_HDL: + + str = comp->eeGetCPString(tree->gtIntCon.gtIconVal); + chars += printf("\"%S\"", str); + break; + + case GTF_ICON_PSTR_HDL: + + chars += printf("PSTR(?)"); + break; + + case GTF_ICON_PTR_HDL: + + chars += printf("PTR(?)"); + break; + + case GTF_ICON_VARG_HDL: + + chars += printf("VARARG(?)"); + break; + + case GTF_ICON_PINVKI_HDL: + + chars += printf("PINVOKE(?)"); + break; + + case GTF_ICON_TOKEN_HDL: + + chars += printf("TOKEN(%08X)", tree->gtIntCon.gtIconVal); + break; + + case GTF_ICON_TLS_HDL: + + chars += printf("TLS(?)"); + break; + + case GTF_ICON_FTN_ADDR: + + chars += printf("FTN(?)"); + break; + + case GTF_ICON_CIDMID_HDL: + + chars += printf("CIDMID(?)"); + break; + + case GTF_ICON_BBC_PTR: + + chars += printf("BBC(?)"); + break; + + default: + + chars += printf("HANDLE(?)"); + break; + } +#else +#ifdef _TARGET_64BIT_ + if ((tree->gtIntCon.gtIconVal & 0xFFFFFFFF00000000LL) != 0) + { + chars += printf("HANDLE(0x%llx)", dspPtr(tree->gtIntCon.gtIconVal)); + } + else +#endif + { + chars += printf("HANDLE(0x%0x)", dspPtr(tree->gtIntCon.gtIconVal)); + } +#endif + } + else + { + if (tree->TypeGet() == TYP_REF) + { + assert(tree->gtIntCon.gtIconVal == 0); + chars += printf("null"); + } +#ifdef _TARGET_64BIT_ + else if ((tree->gtIntCon.gtIconVal & 0xFFFFFFFF00000000LL) != 0) + { + chars += printf("0x%llx", tree->gtIntCon.gtIconVal); + } + else +#endif + { + chars += printf("%ld(0x%x)", tree->gtIntCon.gtIconVal, tree->gtIntCon.gtIconVal); + } + } + break; + + case GT_CNS_LNG: + + chars += printf("CONST(LONG)"); + break; + + case GT_CNS_DBL: + + chars += printf("CONST(DOUBLE)"); + break; + + case GT_CNS_STR: + + chars += printf("CONST(STR)"); + break; + + case GT_JMP: + + { + const char * methodName; + const char * className; + + methodName = comp->eeGetMethodName((CORINFO_METHOD_HANDLE)tree->gtVal.gtVal1, &className); + chars += printf(" %s.%s", className, methodName); + } + break; + + case GT_NO_OP: + case GT_START_NONGC: + case GT_PROF_HOOK: + case GT_CATCH_ARG: + case GT_MEMORYBARRIER: + case GT_ARGPLACE: + case GT_PINVOKE_PROLOG: +#ifndef LEGACY_BACKEND + case GT_JMPTABLE: +#endif + // Do nothing. + break; + + case GT_RET_EXPR: + + chars += printf("t%d", tree->gtRetExpr.gtInlineCandidate->gtTreeID); + break; + + case GT_PHYSREG: + + chars += printf("%s", getRegName(tree->gtPhysReg.gtSrcReg, varTypeIsFloating(tree))); + break; + + case GT_LABEL: + + if (tree->gtLabel.gtLabBB) + chars += printf("BB%02u", tree->gtLabel.gtLabBB->bbNum); + else + chars += printf("BB?"); + break; + + case GT_CLS_VAR: + case GT_CLS_VAR_ADDR: + default: + + if (tree->OperIsLeaf()) + { + chars += printf("<leaf nyi: %s>", tree->OpName(tree->OperGet())); + } + + chars += printf("t%d", tree->gtTreeID); + break; + } + + if (comp->dumpIRTypes) + { + chars += cTreeTypeIR(comp, tree); + } + if (comp->dumpIRValnums) + { + chars += cValNumIR(comp, tree); + } + if (hasSsa && comp->dumpIRSsa) + { + chars += cSsaNumIR(comp, tree); + } + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree leaf node for linear IR form + */ + +int dLeafIR(GenTree* tree) +{ + int chars = cLeafIR(GetTlsCompiler(), tree); + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree indir node for linear IR form + */ + +int cIndirIR(Compiler *comp, GenTree* tree) +{ + assert(tree->gtOper == GT_IND); + + int chars = 0; + GenTree* child; + + chars += printf("["); + child = tree->GetChild(0); + chars += cLeafIR(comp, child); + chars += printf("]"); + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree indir node for linear IR form + */ + +int dIndirIR(GenTree* tree) +{ + int chars = cIndirIR(GetTlsCompiler(), tree); + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree operand node for linear IR form + */ + +int cOperandIR(Compiler* comp, GenTree* operand) +{ + int chars = 0; + + if (operand == NULL) + { + chars += printf("t?"); + return chars; + } + + bool dumpTypes = comp->dumpIRTypes; + bool dumpValnums = comp->dumpIRValnums; + bool foldIndirs = comp->dumpIRDataflow; + bool foldLeafs = comp->dumpIRNoLeafs; + bool foldCommas = comp->dumpIRDataflow; + bool dumpDataflow = comp->dumpIRDataflow; + bool foldLists = comp->dumpIRNoLists; + bool dumpRegs = comp->dumpIRRegs; + + genTreeOps op = operand->OperGet(); + + if (foldLeafs && operand->OperIsLeaf()) + { + if ((op == GT_ARGPLACE) && foldLists) + { + return chars; + } + chars += cLeafIR(comp, operand); + } + else if (dumpDataflow && + (operand->OperIsAssignment() || (op == GT_STORE_LCL_VAR) || (op == GT_STORE_LCL_FLD))) + { + operand = operand->GetChild(0); + chars += cOperandIR(comp, operand); + } + else if ((op == GT_INDEX) && foldIndirs) + { + chars += printf("[t%d]", operand->gtTreeID); + if (dumpTypes) + { + chars += cTreeTypeIR(comp, operand); + } + if (dumpValnums) + { + chars += cValNumIR(comp, operand); + } + } + else if ((op == GT_IND) && foldIndirs) + { + chars += cIndirIR(comp, operand); + if (dumpTypes) + { + chars += cTreeTypeIR(comp, operand); + } + if (dumpValnums) + { + chars += cValNumIR(comp, operand); + } + } + else if ((op == GT_COMMA) && foldCommas) + { + operand = operand->GetChild(1); + chars += cOperandIR(comp, operand); + } + else if ((op == GT_LIST) && foldLists) + { + GenTree *list = operand; + unsigned childCount = list->NumChildren(); + + operand = list->GetChild(0); + int operandChars = cOperandIR(comp, operand); + chars += operandChars; + if (childCount > 1) + { + if (operandChars > 0) + chars += printf(", "); + operand = list->GetChild(1); + if (operand->gtOper == GT_LIST) + { + chars += cListIR(comp, operand); + } + else + { + chars += cOperandIR(comp, operand); + } + } + } + else + { + chars += printf("t%d", operand->gtTreeID); + if (dumpRegs) + { + regNumber regNum = operand->GetReg(); + if (regNum != REG_NA) + { + chars += printf("(%s)", getRegName(regNum)); + } + } + if (dumpTypes) + { + chars += cTreeTypeIR(comp, operand); + } + if (dumpValnums) + { + chars += cValNumIR(comp, operand); + } + } + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree operand node for linear IR form + */ + +int dOperandIR(GenTree* operand) +{ + int chars = cOperandIR(GetTlsCompiler(), operand); + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree list of nodes for linear IR form + */ + +int cListIR(Compiler* comp, GenTree* list) +{ + int chars = 0; + int operandChars; + + assert(list->gtOper == GT_LIST); + + GenTree* child; + unsigned childCount; + + childCount = list->NumChildren(); + assert(childCount == 1 || childCount == 2); + + operandChars = 0; + for (unsigned childIndex = 0; childIndex < childCount; childIndex++) + { + if ((childIndex > 0) && (operandChars > 0)) + chars += printf(", "); + + child = list->GetChild(childIndex); + operandChars = cOperandIR(comp, child); + chars += operandChars; + } + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree list of nodes for linear IR form + */ + +int dListIR(GenTree* list) +{ + int chars = cListIR(GetTlsCompiler(), list); + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree dependencies based on comma nodes for linear IR form + */ + +int cDependsIR(Compiler* comp, GenTree* comma, bool *first) +{ + int chars = 0; + + assert(comma->gtOper == GT_COMMA); + + GenTree* child; + + child = comma->GetChild(0); + if (child->gtOper == GT_COMMA) + { + chars += cDependsIR(comp, child, first); + } + else + { + if (!(*first)) + chars += printf(", "); + chars += printf("t%d", child->gtTreeID); + *first = false; + } + + child = comma->GetChild(1); + if (child->gtOper == GT_COMMA) + { + chars += cDependsIR(comp, child, first); + } + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree dependencies based on comma nodes for linear IR form + */ + +int dDependsIR(GenTree* comma) +{ + int chars = 0; + bool first = TRUE; + + chars = cDependsIR(GetTlsCompiler(), comma, &first); + + return chars; +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree node in linear IR form + */ + +void cNodeIR(Compiler* comp, GenTree* tree) +{ + bool foldLeafs = comp->dumpIRNoLeafs; + bool foldIndirs = comp->dumpIRDataflow; + bool foldLists = comp->dumpIRNoLists; + bool dataflowView = comp->dumpIRDataflow; + bool dumpTypes = comp->dumpIRTypes; + bool dumpValnums = comp->dumpIRValnums; + bool noStmts =comp->dumpIRNoStmts; + genTreeOps op = tree->OperGet(); + unsigned childCount = tree->NumChildren(); + GenTree* child; + + // What are we skipping? + + if (tree->OperIsLeaf()) + { + if (foldLeafs) + { + return; + } + } + else if (op == GT_IND) + { + if (foldIndirs) + { + return; + } + } + else if (op == GT_LIST) + { + if (foldLists) + { + return; + } + } + else if (op == GT_STMT) + { + if (noStmts) + { + if (dataflowView) + { + child = tree->GetChild(0); + if (child->gtOper != GT_COMMA) + { + return; + } + } + else + { + return; + } + } + } + else if (op == GT_COMMA) + { + if (dataflowView) + { + return; + } + } + + // Dump tree id or dataflow destination. + + int chars = 0; + + // if (comp->compRationalIRForm) + // { + // chars += printf("R"); + // } + + chars += printf(" "); + if (dataflowView && tree->OperIsAssignment()) + { + child = tree->GetChild(0); + chars += cOperandIR(comp, child); + } + else if (dataflowView && ((op == GT_STORE_LCL_VAR) || (op == GT_STORE_LCL_FLD))) + { + chars += cLeafIR(comp, tree); + } + else if (dataflowView && (op == GT_STOREIND)) + { + child = tree->GetChild(0); + chars += printf("["); + chars += cOperandIR(comp, child); + chars += printf("]"); + if (dumpTypes) + { + chars += cTreeTypeIR(comp, tree); + } + if (dumpValnums) + { + chars += cValNumIR(comp, tree); + } + } + else + { + chars += printf("t%d", tree->gtTreeID); + if (comp->dumpIRRegs) + { + regNumber regNum = tree->GetReg(); + if (regNum != REG_NA) + { + chars += printf("(%s)", getRegName(regNum)); + } + } + if (dumpTypes) + { + chars += cTreeTypeIR(comp, tree); + } + if (dumpValnums) + { + chars += cValNumIR(comp, tree); + } + } + + // Dump opcode and tree ID if need in dataflow view. + + chars += dTabStopIR(chars, COLUMN_OPCODE); + const char * opName = tree->OpName(op); + chars += printf(" = %s", opName); + + if (dataflowView) + { + if (tree->OperIsAssignment() + || (op == GT_STORE_LCL_VAR) || (op == GT_STORE_LCL_FLD) || (op == GT_STOREIND)) + { + chars += printf("(t%d)", tree->gtTreeID); + } + } + + // Dump modifiers for opcodes to help with readability + + if (op == GT_CALL) + { + GenTreeCall * call = tree->AsCall(); + + if (call->gtCallType == CT_USER_FUNC) + { + if (call->IsVirtualStub()) + { + chars += printf(":VS"); + } + else if (call->IsVirtualVtable()) + { + chars += printf(":VT"); + } + else if (call->IsVirtual()) + { + chars += printf(":V"); + } + } + else if (call->gtCallType == CT_HELPER) + { + chars += printf(":H"); + } + else if (call->gtCallType == CT_INDIRECT) + { + chars += printf(":I"); + } + else if (call->IsUnmanaged()) + { + chars += printf(":U"); + } + else + { + if (call->IsVirtualStub()) + { + chars += printf(":XVS"); + } + else if (call->IsVirtualVtable()) + { + chars += printf(":XVT"); + } + else + { + chars += printf(":?"); + } + } + + if (call->IsUnmanaged()) + { + if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL) + { + chars += printf(":T"); + } + } + + if (tree->gtFlags & GTF_CALL_NULLCHECK) + { + chars += printf(":N"); + } + } + else if (op == GT_INTRINSIC) + { + CorInfoIntrinsics intrin = tree->gtIntrinsic.gtIntrinsicId; + + chars += printf(":"); + switch (intrin) + { + case CORINFO_INTRINSIC_Sin: + chars += printf("Sin"); + break; + case CORINFO_INTRINSIC_Cos: + chars += printf("Cos"); + break; + case CORINFO_INTRINSIC_Sqrt: + chars += printf("Sqrt"); + break; + case CORINFO_INTRINSIC_Cosh: + chars += printf("Cosh"); + break; + case CORINFO_INTRINSIC_Sinh: + chars += printf("Sinh"); + break; + case CORINFO_INTRINSIC_Tan: + chars += printf("Tan"); + break; + case CORINFO_INTRINSIC_Tanh: + chars += printf("Tanh"); + break; + case CORINFO_INTRINSIC_Asin: + chars += printf("Asin"); + break; + case CORINFO_INTRINSIC_Acos: + chars += printf("Acos"); + break; + case CORINFO_INTRINSIC_Atan: + chars += printf("Atan"); + break; + case CORINFO_INTRINSIC_Atan2: + chars += printf("Atan2"); + break; + case CORINFO_INTRINSIC_Log10: + chars += printf("Log10"); + break; + case CORINFO_INTRINSIC_Pow: + chars += printf("Pow"); + break; + case CORINFO_INTRINSIC_Exp: + chars += printf("Exp"); + break; + case CORINFO_INTRINSIC_Ceiling: + chars += printf("Ceiling"); + break; + case CORINFO_INTRINSIC_Floor: + chars += printf("Floor"); + break; + default: + chars += printf("unknown(%d)", intrin); + break; + } + } + + // Dump operands. + + chars += dTabStopIR(chars, COLUMN_OPERANDS); + + // Dump operator specific fields as operands + + switch (op) + { + default: + break; + case GT_FIELD: + + { + const char * className = NULL; + const char * fieldName = comp->eeGetFieldName(tree->gtField.gtFldHnd, &className); + + chars += printf(" %s.%s", className, fieldName); + } + break; + + case GT_CALL: + + if (tree->gtCall.gtCallType != CT_INDIRECT) + { + const char * methodName; + const char * className; + + methodName = comp->eeGetMethodName(tree->gtCall.gtCallMethHnd, &className); + + chars += printf(" %s.%s", className, methodName); + } + break; + + case GT_STORE_LCL_VAR: + case GT_STORE_LCL_FLD: + + if (!dataflowView) + { + chars += printf(" "); + chars += cLeafIR(comp, tree); + } + break; + + case GT_STORE_CLS_VAR: + + chars += printf(" ???"); + break; + + case GT_LEA: + + GenTreeAddrMode * lea = tree->AsAddrMode(); + GenTree *base = lea->Base(); + GenTree *index = lea->Index(); + unsigned scale = lea->gtScale; + unsigned offset = lea->gtOffset; + + chars += printf(" ["); + if (base != NULL) + { + chars += cOperandIR(comp, base); + } + if (index != NULL) + { + if (base != NULL) + { + chars += printf("+"); + } + chars += cOperandIR(comp, index); + if (scale > 1) + { + chars += printf("*%u", scale); + } + } + if ((offset != 0) || ((base == NULL) && (index == NULL))) + { + if ((base != NULL) || (index != NULL)) + { + chars += printf("+"); + } + chars += printf("%u", offset); + } + chars += printf("]"); + break; + } + + // Dump operands. + + if (tree->OperIsLeaf()) + { + chars += printf(" "); + chars += cLeafIR(comp, tree); + } + else if (op == GT_LEA) + { + // Already dumped it above. + } + else if (op == GT_PHI) + { + if (tree->gtOp.gtOp1 != NULL) + { + bool first = true; + for (GenTreeArgList* args = tree->gtOp.gtOp1->AsArgList(); args != NULL; args = args->Rest()) + { + child = args->Current(); + if (!first) + { + chars += printf(","); + } + first = false; + chars += printf(" "); + chars += cOperandIR(comp, child); + } + } + } + else + { + bool hasComma = false; + bool first = true; + int operandChars = 0; + for (unsigned childIndex = 0; childIndex < childCount; childIndex++) + { + child = tree->GetChild(childIndex); + if (child == NULL) + { + continue; + } + + if (child->gtOper == GT_COMMA) + { + hasComma = true; + } + + if (dataflowView && (childIndex == 0)) + { + if ((op == GT_ASG) || (op == GT_STOREIND)) + { + continue; + } + } + + if (!first) + { + chars += printf(","); + } + + bool isList = (child->gtOper == GT_LIST); + if (!isList || !foldLists) + { + if (foldLeafs && (child->gtOper == GT_ARGPLACE)) + { + continue; + } + chars += printf(" "); + operandChars = cOperandIR(comp, child); + chars += operandChars; + if (operandChars > 0) + first = false; + } + else + { + assert(isList); + chars += printf(" "); + operandChars = cOperandIR(comp, child); + chars += operandChars; + if (operandChars > 0) + first = false; + } + + } + + if (dataflowView && hasComma) + { + chars += printf(", DEPS("); + first = true; + for (unsigned childIndex = 0; childIndex < childCount; childIndex++) + { + child = tree->GetChild(childIndex); + if (child->gtOper == GT_COMMA) + { + chars += cDependsIR(comp, child, &first); + } + } + chars += printf(")"); + } + } + + // Dump kinds, flags, costs + + if (comp->dumpIRKinds || comp->dumpIRFlags || comp->dumpIRCosts) + { + chars += dTabStopIR(chars, COLUMN_KINDS); + chars += printf(";"); + if (comp->dumpIRKinds) + { + chars += printf(" "); + chars += cTreeKindsIR(comp, tree); + } + if (comp->dumpIRFlags && (tree->gtFlags != 0)) + { + if (comp->dumpIRKinds) + { + chars += dTabStopIR(chars, COLUMN_FLAGS); + } + else + { + chars += printf(" "); + } + chars += cTreeFlagsIR(comp, tree); + } + if (comp->dumpIRCosts && (tree->gtCostsInitialized)) + { + chars += printf(" CostEx=%d, CostSz=%d", tree->GetCostEx(), tree->GetCostSz()); + } + } + + printf("\n"); +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree in linear IR form + */ + +void cTreeIR(Compiler* comp, GenTree* tree) +{ + bool foldLeafs = comp->dumpIRNoLeafs; + bool foldIndirs = comp->dumpIRDataflow; + bool foldLists = comp->dumpIRNoLists; + bool dataflowView = comp->dumpIRDataflow; + bool dumpTypes = comp->dumpIRTypes; + bool dumpValnums = comp->dumpIRValnums; + bool noStmts =comp->dumpIRNoStmts; + genTreeOps op = tree->OperGet(); + unsigned childCount = tree->NumChildren(); + GenTree* child; + + // Recurse and dump trees that this node depends on. + + if (tree->OperIsLeaf()) + { + } + else if (tree->OperIsBinary() && tree->IsReverseOp()) + { + child = tree->GetChild(1); + cTreeIR(comp, child); + child = tree->GetChild(0); + cTreeIR(comp, child); + } + else if (op == GT_PHI) + { + // Don't recurse. + } + else + { + assert(!tree->IsReverseOp()); + for (unsigned childIndex = 0; childIndex < childCount; childIndex++) + { + child = tree->GetChild(childIndex); + if (child != NULL) + { + cTreeIR(comp, child); + } + } + } + + cNodeIR(comp, tree); +} + +/***************************************************************************** + * + * COMPLUS_JitDumpIR support - dump out tree in linear IR form + */ + +void dTreeIR(GenTree* tree) +{ + cTreeIR(GetTlsCompiler(), tree); +} + #endif // DEBUG #if VARSET_COUNTOPS diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 91ebbde4a4..fdedaaae8f 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -42,7 +42,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "tinyarray.h" #include "valuenum.h" #include "reglist.h" - +#include "jittelemetry.h" #ifdef LATE_DISASM #include "disasm.h" #endif @@ -124,6 +124,8 @@ var_types genActualType (var_types type); var_types genUnsignedType(var_types type); var_types genSignedType (var_types type); +unsigned ReinterpretHexAsDecimal(unsigned); + /*****************************************************************************/ #ifdef FEATURE_SIMD @@ -169,7 +171,13 @@ struct VarScopeDsc // This is the location of a definition. struct DefLoc { BasicBlock* m_blk; - // We'll need more precise info later... + GenTreePtr m_tree; + + DefLoc() : + m_blk(nullptr), + m_tree(nullptr) + { + } }; // This class encapsulates all info about a local variable that may vary for different SSA names @@ -179,6 +187,8 @@ class LclSsaVarDsc public: ValueNumPair m_vnPair; DefLoc m_defLoc; + + LclSsaVarDsc() {} }; typedef ExpandArray<LclSsaVarDsc> PerSsaArray; @@ -225,9 +235,6 @@ public: unsigned char lvHasLdAddrOp:1; // has ldloca or ldarga opcode on this local. unsigned char lvStackByref :1; // This is a compiler temporary of TYP_BYREF that is known to point into our local stack frame. -#ifdef DEBUG - unsigned char lvSafeAddrTaken :1; // variable has its address taken, but it's consumed in the next instruction. -#endif unsigned char lvArgWrite :1; // variable is a parameter and STARG was used on it unsigned char lvIsTemp :1; // Short-lifetime compiler temp #if OPT_BOOL_OPS @@ -257,7 +264,6 @@ public: unsigned char lvQuirkToLong :1; // Quirk to allocate this LclVar as a 64-bit long #endif #ifdef DEBUG - unsigned char lvDblWasInt :1; // Was this TYP_DOUBLE originally a TYP_INT? unsigned char lvKeepType :1; // Don't change the type of this variable unsigned char lvNoLclFldStress :1;// Can't apply local field stress on this one #endif @@ -269,12 +275,11 @@ public: unsigned char lvOverlappingFields :1; // True when we have a struct with possibly overlapping fields unsigned char lvContainsHoles :1; // True when we have a promoted struct that contains holes unsigned char lvCustomLayout :1; // True when this struct has "CustomLayout" -#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - unsigned char lvDontPromote:1; // Should struct promoter consider this variable for promotion? +#if FEATURE_MULTIREG_STRUCTS + unsigned char lvDontPromote:1; // Should struct promotion consider this local variable for promotion? #endif - #ifdef _TARGET_ARM_ - unsigned char lvIsHfaRegArg :1; // Is this argument variable holding a HFA register argument. + unsigned char lvIsHfaRegArg:1; // Is this argument variable holding a HFA register argument. unsigned char lvHfaTypeIsFloat:1; // Is the HFA type float or double? #endif @@ -289,7 +294,10 @@ public: #ifndef LEGACY_BACKEND unsigned char lvLRACandidate :1; // Tracked for linear scan register allocation purposes #endif // !LEGACY_BACKEND + #ifdef FEATURE_SIMD + // Note that both SIMD vector args and locals are marked as lvSIMDType = true, but the + // type of an arg node is TYP_BYREF and a local node is TYP_SIMD*. unsigned char lvSIMDType :1; // This is a SIMD struct unsigned char lvUsedInSIMDIntrinsic :1; // This tells lclvar is used for simd intrinsic #endif // FEATURE_SIMD @@ -308,7 +316,7 @@ public: unsigned char lvFldOffset; unsigned char lvFldOrdinal; -#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if FEATURE_MULTIREG_STRUCT_ARGS regNumber lvRegNumForSlot(unsigned slotNum) { if (slotNum == 0) @@ -326,7 +334,7 @@ public: unreached(); } -#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#endif // FEATURE_MULTIREG_STRUCT_ARGS private: @@ -340,9 +348,10 @@ private: regNumberSmall _lvArgReg; // The register in which this argument is passed. -#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if FEATURE_MULTIREG_STRUCT_ARGS regNumberSmall _lvOtherArgReg; // Used for the second part of the struct passed in a register. -#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Note this is defined but not used by ARM32 +#endif // FEATURE_MULTIREG_STRUCT_ARGS #ifndef LEGACY_BACKEND union @@ -421,9 +430,9 @@ public: assert(_lvArgReg == reg); } -#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if FEATURE_MULTIREG_STRUCT_ARGS __declspec(property(get = GetOtherArgReg, put = SetOtherArgReg)) - regNumber lvOtherArgReg; + regNumber lvOtherArgReg; regNumber GetOtherArgReg() const { @@ -435,7 +444,7 @@ public: _lvOtherArgReg = (regNumberSmall)reg; assert(_lvOtherArgReg == reg); } -#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#endif // FEATURE_MULTIREG_STRUCT_ARGS #ifdef FEATURE_SIMD // Is this is a SIMD struct? @@ -582,7 +591,7 @@ public: // Here, the "struct(U)" shows that the "V03 loc2" variable is unused. Not shown is that V03 // is now TYP_INT in the local variable table. It's not really unused, because it's in the tree. - assert((lvType == TYP_STRUCT) || + assert(varTypeIsStruct(lvType) || (lvType == TYP_BLK) || (lvPromoted && lvUnusedStruct)); return (unsigned)(roundUp(lvExactSize, sizeof(void*))); @@ -604,7 +613,6 @@ public: #if ASSERTION_PROP BlockSet lvRefBlks; // Set of blocks that contain refs GenTreePtr lvDefStmt; // Pointer to the statement with the single definition - EXPSET_TP lvAssertionDep; // Assertions that depend on us (i.e to this var) void lvaDisqualifyVar(); // Call to disqualify a local variable from use in optAddCopies #endif var_types TypeGet() const { return (var_types) lvType; } @@ -954,11 +962,15 @@ struct ArrayInfo // partition a compilation. enum Phases { -#define CompPhaseNameMacro(enum_nm, string_nm, hasChildren, parent) enum_nm, +#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) enum_nm, #include "compphases.h" PHASE_NUMBER_OF }; +extern const char* PhaseNames[]; +extern const char* PhaseEnums[]; +extern const LPCWSTR PhaseShortNames[]; + //--------------------------------------------------------------- // Compilation time. // @@ -1189,7 +1201,7 @@ struct fgArgTabEntry fgArgTabEntry() { otherRegNum = REG_NA; - isStruct = false; // is this a struct arg + isStruct = false; // is this a struct arg } #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) @@ -1220,8 +1232,9 @@ struct fgArgTabEntry bool isNonStandard:1; // True if it is an arg that is passed in a reg other than a standard arg reg #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - regNumber otherRegNum; // The (second) register to use when passing this argument. - bool isStruct; // is this a struct arg. + regNumber otherRegNum; // The (second) register to use when passing this argument. + bool isStruct; // is this a struct arg + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) @@ -1450,6 +1463,26 @@ public: #ifdef DEBUG bool verbose; + bool dumpIR; + bool dumpIRNodes; + bool dumpIRTypes; + bool dumpIRKinds; + bool dumpIRLocals; + bool dumpIRRegs; + bool dumpIRSsa; + bool dumpIRValnums; + bool dumpIRCosts; + bool dumpIRFlags; + bool dumpIRNoLists; + bool dumpIRNoLeafs; + bool dumpIRNoStmts; + bool dumpIRTrees; + bool dumpIRLinear; + bool dumpIRDataflow; + bool dumpIRBlockHeaders; + bool dumpIRExit; + LPCWSTR dumpIRPhase; + LPCWSTR dumpIRFormat; bool verboseTrees; bool shouldUseVerboseTrees(); bool asciiTrees; // If true, dump trees using only ASCII characters @@ -1464,9 +1497,12 @@ public: DWORD expensiveDebugCheckLevel; #endif -#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if FEATURE_MULTIREG_STRUCTS CORINFO_CLASS_HANDLE GetStructClassHandle(GenTreePtr tree); -#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#endif +#if FEATURE_MULTIREG_STRUCT_RET + GenTreePtr impAssignStructClassToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass); +#endif #ifdef _TARGET_ARM_ @@ -1868,6 +1904,8 @@ protected: GenTreePtr src, GenTreePtr size, bool volatil); public: + GenTreeLdObj* gtNewLdObjNode (CORINFO_CLASS_HANDLE structHnd, GenTreePtr addr); + GenTreeBlkOp* gtNewCpObjNode (GenTreePtr dst, GenTreePtr src, CORINFO_CLASS_HANDLE structHnd, bool volatil); @@ -1981,7 +2019,16 @@ public: GenTreePtr gtCloneExpr (GenTree * tree, unsigned addFlags = 0, unsigned varNum = (unsigned)-1, - int varVal = 0); + int varVal = 0); + + GenTreePtr gtReplaceTree (GenTreePtr stmt, + GenTreePtr tree, + GenTreePtr replacementTree); + + void gtUpdateSideEffects(GenTreePtr tree, + unsigned oldGtFlags, + unsigned newGtFlags); + // Returns "true" iff the complexity (not formally defined, but first interpretation // is #of nodes in subtree) of "tree" is greater than "limit". // (This is somewhat redundant with the "gtCostEx/gtCostSz" fields, but can be used @@ -2049,6 +2096,9 @@ public: // is such an object pointer. bool gtIsStaticFieldPtrToBoxedStruct(var_types fieldNodeType, CORINFO_FIELD_HANDLE fldHnd); + // Return true if call is a recursive call; return false otherwise. + bool gtIsRecursiveCall(GenTreeCall * call) { return (call->gtCallMethHnd == info.compMethodHnd); } + //------------------------------------------------------------------------- GenTreePtr gtFoldExpr (GenTreePtr tree); @@ -2067,11 +2117,15 @@ public: GenTreePtr gtFoldExprCompare(GenTreePtr tree); //------------------------------------------------------------------------- + // Get the handle, if any. + CORINFO_CLASS_HANDLE gtGetStructHandleIfPresent (GenTreePtr tree); + // Get the handle, and assert if not found. + CORINFO_CLASS_HANDLE gtGetStructHandle (GenTreePtr tree); + + //------------------------------------------------------------------------- // Functions to display the trees #ifdef DEBUG - bool gtDblWasInt (GenTree * tree) const; - void gtDispNode (GenTreePtr tree, IndentStack* indentStack, __in_z const char* msg); @@ -2093,6 +2147,10 @@ public: IndentStack* indentStack = nullptr, __in_opt const char* msg = nullptr, bool topOnly = false); + void gtGetLclVarNameInfo(unsigned lclNum, + const char** ilKindOut, + const char** ilNameOut, + unsigned* ilNumOut); int gtGetLclVarName (unsigned lclNum, char* buf, unsigned buf_remaining); @@ -2484,9 +2542,6 @@ public : void lvaAdjustRefCnts (); #ifdef DEBUG - static fgWalkPreFn lvaStressFloatLclsCB; - void lvaStressFloatLcls (); - struct lvaStressLclFldArgs { Compiler* m_pCompiler; @@ -2513,6 +2568,23 @@ public : BOOL lvaIsOriginalThisReadOnly (); // return TRUE if there is no place in the code // that writes to arg0 + // Struct parameters that are passed by reference are marked as both lvIsParam and lvIsTemp + // (this is an overload of lvIsTemp because there are no temp parameters). + // For x64 this is 3, 5, 6, 7, >8 byte structs that are passed by reference. + // For ARM64, this is structs larger than 16 bytes that are passed by reference. + bool lvaIsImplicitByRefLocal(unsigned varNum) + { +#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) + LclVarDsc* varDsc = &(lvaTable[varNum]); + if (varDsc->lvIsParam && varDsc->lvIsTemp) + { + assert((varDsc->lvType == TYP_STRUCT) || (varDsc->lvType == TYP_BYREF)); + return true; + } +#endif // defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) + return false; + } + // If the class is a TYP_STRUCT, get/set a class handle describing it CORINFO_CLASS_HANDLE lvaGetStruct (unsigned varNum); @@ -2544,6 +2616,14 @@ public : bool customLayout; unsigned char fieldCnt; lvaStructFieldInfo fields[MAX_NumOfFieldsInPromotableStruct]; + + lvaStructPromotionInfo() + : typeHnd (0) + , canPromote (false) + , requiresScratchVar(false) + , containsHoles (false) + , customLayout (false) + {} }; static int __cdecl lvaFieldOffsetCmp(const void * field1, const void * field2); @@ -2828,10 +2908,16 @@ public: CORINFO_CLASS_HANDLE structHnd, unsigned curLevel, bool willDeref); - GenTreePtr impNormStructVal (GenTreePtr structVal, + + var_types impNormStructType (CORINFO_CLASS_HANDLE structHnd, + BYTE* gcLayout = nullptr, + unsigned* numGCVars = nullptr, + var_types* simdBaseType = nullptr); + + GenTreePtr impNormStructVal (GenTreePtr structVal, CORINFO_CLASS_HANDLE structHnd, - unsigned curLevel, - bool forceNormalization = false); + unsigned curLevel, + bool forceNormalization = false); GenTreePtr impTokenToHandle (CORINFO_RESOLVED_TOKEN * pResolvedToken, BOOL *pRuntimeLookup = NULL, @@ -2869,7 +2955,13 @@ public: bool VarTypeIsMultiByteAndCanEnreg(var_types type, CORINFO_CLASS_HANDLE typeClass, - unsigned *typeSize); + unsigned *typeSize, + bool forReturn); + + static bool IsIntrinsicImplementedByUserCall(CorInfoIntrinsics intrinsicId); + static bool IsTargetIntrinsic(CorInfoIntrinsics intrinsicId); + static bool IsMathIntrinsic(CorInfoIntrinsics intrinsicId); + static bool IsMathIntrinsic(GenTreePtr tree); private: @@ -3091,10 +3183,6 @@ private: void impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr op, CORINFO_CLASS_HANDLE hClass); #endif -#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - GenTreePtr impAssignStructToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass); -#endif - // A free list of linked list nodes used to represent to-do stacks of basic blocks. struct BlockListNode { @@ -3200,12 +3288,14 @@ private: OPCODE curOpcode, const BYTE *codeAddrOfNextOpcode, const BYTE *codeEnd, + bool isRecursive, bool *IsCallPopRet = nullptr); bool impIsImplicitTailCallCandidate(OPCODE curOpcode, const BYTE *codeAddrOfNextOpcode, const BYTE *codeEnd, - int prefixFlags); + int prefixFlags, + bool isRecursive); /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX @@ -3496,6 +3586,8 @@ public : GenTreeStmt* fgNewStmtFromTree (GenTreePtr tree, BasicBlock* block); GenTreeStmt* fgNewStmtFromTree (GenTreePtr tree, IL_OFFSETX offs); + GenTreePtr fgGetLastTopLevelStmt(BasicBlock *block); + GenTreePtr fgGetTopLevelQmark (GenTreePtr expr, GenTreePtr* ppDst = NULL); void fgExpandQmarkForCastInstOf(BasicBlock* block, GenTreePtr stmt); void fgExpandQmarkStmt (BasicBlock* block, GenTreePtr expr); @@ -3752,9 +3844,15 @@ public : // an assignment. void fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd = false); + // Does value-numbering for a block assignment. + void fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd); + // Does value-numbering for a cast tree. void fgValueNumberCastTree(GenTreePtr tree); + // Does value-numbering for an intrinsic tree. + void fgValueNumberIntrinsic(GenTreePtr tree); + // Does value-numbering for a call. We interpret some helper calls. void fgValueNumberCall(GenTreeCall* call); @@ -3787,7 +3885,7 @@ public : { if (elemStructType != nullptr) { - assert(elemTyp == TYP_STRUCT || elemTyp == TYP_REF || elemTyp == TYP_BYREF || varTypeIsIntegral(elemTyp)); + assert(varTypeIsStruct(elemTyp) || elemTyp == TYP_REF || elemTyp == TYP_BYREF || varTypeIsIntegral(elemTyp)); assert((size_t(elemStructType) & 0x1) == 0x0); // Make sure the encoding below is valid. return elemStructType; } @@ -3813,6 +3911,13 @@ public : } } + // Get the "primitive" type, if any, that is used to pass or return + // values of the given struct type. + var_types argOrReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, bool forReturn); + + // Slightly optimized version of the above where we've already computed the size, + // so as to avoid a repeated JIT/EE interface call. + var_types argOrReturnTypeForStruct(unsigned size, CORINFO_CLASS_HANDLE clsHnd, bool forReturn); #ifdef DEBUG // Print a representation of "vnp" or "vn" on standard output. @@ -4179,12 +4284,12 @@ public: unsigned fgGetCodeEstimate(BasicBlock * block); -#if XML_FLOWGRAPHS +#if DUMP_FLOWGRAPHS const char * fgProcessEscapes(const char * nameIn, escapeMapping_t *map); - FILE * fgOpenXmlFlowGraphFile(bool * wbDontClose); - bool fgDumpXmlFlowGraph(); + FILE * fgOpenFlowGraphFile(bool * wbDontClose, Phases phase, LPCWSTR type); + bool fgDumpFlowGraph(Phases phase); -#endif // XML_FLOWGRAPHS +#endif // DUMP_FLOWGRAPHS #ifdef DEBUG void fgDispDoms (); @@ -4434,10 +4539,15 @@ private: void fgFixupIfCallArg(ArrayStack<GenTree *> *parentStack, GenTree *oldChild, GenTree *newChild); + + void fgFixupArgTabEntryPtr(GenTreePtr parentCall, + GenTreePtr oldArg, + GenTreePtr newArg); + // Recognize a bitwise rotation pattern and convert into a GT_ROL or a GT_ROR node. GenTreePtr fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree); bool fgOperIsBitwiseRotationRoot(genTreeOps oper); - + //-------- Determine the order in which the trees will be evaluated ------- unsigned fgTreeSeqNum; @@ -4552,6 +4662,9 @@ private: GenTreePtr fgMorphField (GenTreePtr tree, MorphAddrContext* mac); bool fgCanFastTailCall (GenTreeCall* call); void fgMorphTailCall (GenTreeCall* call); + void fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall); + GenTreePtr fgAssignRecursiveCallArgToCallerParam(GenTreePtr arg, fgArgTabEntryPtr argTabEntry, BasicBlock* block, IL_OFFSETX callILOffset, + GenTreePtr tmpAssignmentInsertionPoint, GenTreePtr paramAssignmentInsertionPoint); static int fgEstimateCallStackSize(GenTreeCall* call); GenTreePtr fgMorphCall (GenTreeCall* call); GenTreePtr fgMorphCallInline (GenTreePtr call); @@ -4905,6 +5018,8 @@ protected : CALLINT_ALL, // kills everything (normal method call) }; +public: + // A "LoopDsc" describes a ("natural") loop. We (currently) require the body of a loop to be a contiguous (in bbNext order) // sequence of basic blocks. (At times, we may require the blocks in a loop to be "properly numbered" in bbNext order; // we use comparisons on the bbNum to decide order.) @@ -5076,10 +5191,18 @@ protected : }; +protected : + bool fgMightHaveLoop(); // returns true if there are any backedges bool fgHasLoops; // True if this method has any loops, set in fgComputeReachability + +public : + LoopDsc optLoopTable[MAX_LOOP_NUM]; // loop descriptor table unsigned char optLoopCount; // number of tracked loops + +protected : + unsigned optCallCount; // number of calls made in the method unsigned optIndirectCallCount; // number of virtual, interface and indirect calls made in the method unsigned optNativeCallCount; // number of Pinvoke/Native calls made in the method @@ -5236,7 +5359,7 @@ protected : // static const unsigned MAX_CSE_CNT = EXPSET_SZ; - static const int MIN_CSE_COST = IND_COST_EX; + static const int MIN_CSE_COST = 2; /* Generic list of nodes - used by the CSE logic */ @@ -5418,17 +5541,73 @@ public: int optCopyProp_LclVarScore(LclVarDsc* lclVarDsc, LclVarDsc* copyVarDsc, bool preferOp2); void optVnCopyProp(); + /************************************************************************** + * Early value propagation + *************************************************************************/ + struct SSAName + { + unsigned m_lvNum; + unsigned m_ssaNum; + + SSAName(unsigned lvNum, unsigned ssaNum) : + m_lvNum(lvNum), + m_ssaNum(ssaNum) + { + } + + static unsigned GetHashCode(SSAName ssaNm) + { + return (ssaNm.m_lvNum << 16) | (ssaNm.m_ssaNum); + } + + static bool Equals(SSAName ssaNm1, SSAName ssaNm2) + { + return (ssaNm1.m_lvNum == ssaNm2.m_lvNum) && (ssaNm1.m_ssaNum == ssaNm2.m_ssaNum); + } + }; + +#define OMF_HAS_NEWARRAY 0x00000001 // Method contains 'new' of an array +#define OMF_HAS_NEWOBJ 0x00800002 // Method contains 'new' of an object type. +#define OMF_HAS_ARRAYREF 0x00000004 // Method contains array element loads or stores. +#define OMF_HAS_VTABLEREF 0x00000008 // Method contains method table reference. + + unsigned optMethodFlags; + + // Recursion bound controls how far we can go backwards tracking for a SSA value. + // No throughput diff was found with backward walk bound between 3-8. + static const int optEarlyPropRecurBound = 5; + + enum class optPropKind + { + OPK_INVALID, + OPK_ARRAYLEN, + OPK_OBJ_GETTYPE + }; + + bool impHasArrayRef; + + bool gtIsVtableRef(GenTreePtr tree); + GenTreePtr getArrayLengthFromAllocation(GenTreePtr tree); + GenTreePtr getObjectHandleNodeFromAllocation(GenTreePtr tree); + GenTreePtr optPropGetValueRec(unsigned lclNum, unsigned ssaNum, optPropKind valueKind, int walkDepth); + GenTreePtr optPropGetValue(unsigned lclNum, unsigned ssaNum, optPropKind valueKind); + bool optEarlyPropRewriteTree(GenTreePtr tree); + bool optDoEarlyPropForBlock(BasicBlock* block); + bool optDoEarlyPropForFunc(); + void optEarlyProp(); + + #if ASSERTION_PROP /************************************************************************** * Value/Assertion propagation *************************************************************************/ public: - // The following is the upper limit on how many assertions we'll keep track - // of during global assertion propagation. - // - static const unsigned MAX_ASSERTION_CNT = EXPSET_SZ; // Data structures for assertion prop + BitVecTraits* apTraits; + ASSERT_TP apFull; + ASSERT_TP apEmpty; + enum optAssertionKind { OAK_INVALID, OAK_EQUAL, OAK_NOT_EQUAL, @@ -5440,6 +5619,7 @@ public: O1K_ARR_BND, O1K_ARRLEN_OPER_BND, O1K_ARRLEN_LOOP_BND, + O1K_CONSTANT_LOOP_BND, O1K_EXACT_TYPE, O1K_SUBTYPE }; @@ -5506,6 +5686,10 @@ public: { return ((assertionKind == OAK_EQUAL || assertionKind == OAK_NOT_EQUAL) && op1.kind == O1K_ARRLEN_LOOP_BND); } + bool IsConstantBound() + { + return ((assertionKind == OAK_EQUAL || assertionKind == OAK_NOT_EQUAL) && op1.kind == O1K_CONSTANT_LOOP_BND); + } bool IsBoundsCheckNoThrow() { return ((assertionKind == OAK_NO_THROW) && @@ -5547,12 +5731,11 @@ public: return SCHAR_MIN; case TYP_SHORT: return SHRT_MIN; - case TYP_CHAR: - return SCHAR_MIN; case TYP_INT: return INT_MIN; case TYP_BOOL: case TYP_UBYTE: + case TYP_CHAR: case TYP_USHORT: case TYP_UINT: return 0; @@ -5570,12 +5753,11 @@ public: return SCHAR_MAX; case TYP_SHORT: return SHRT_MAX; - case TYP_CHAR: - return SCHAR_MAX; case TYP_INT: return INT_MAX; case TYP_UBYTE: return UCHAR_MAX; + case TYP_CHAR: case TYP_USHORT: return USHRT_MAX; case TYP_UINT: @@ -5642,8 +5824,11 @@ public: } }; + typedef unsigned short AssertionIndex; + protected: static fgWalkPreFn optAddCopiesCallback; + static fgWalkPreFn optVNAssertionPropCurStmtVisitor; unsigned optAddCopyLclNum; GenTreePtr optAddCopyAsgnNode; @@ -5653,97 +5838,113 @@ protected: #ifdef DEBUG GenTreePtr optAssertionPropCurrentTree; #endif - AssertionDsc optAssertionTabPrivate[MAX_ASSERTION_CNT]; // table that holds info about value assignments - unsigned optAssertionCount; // total number of assertions in the assertion table + AssertionIndex* optComplementaryAssertionMap; + ExpandArray<ASSERT_TP>* optAssertionDep; // table that holds dependent assertions (assertions + // using the value of a local var) for each local var + AssertionDsc* optAssertionTabPrivate; // table that holds info about value assignments + AssertionIndex optAssertionCount; // total number of assertions in the assertion table + AssertionIndex optMaxAssertionCount; public : - unsigned GetAssertionCount() + void optVnNonNullPropCurStmt(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree); + fgWalkResult optVNConstantPropCurStmt(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree); + GenTreePtr optVNConstantPropOnRelOp(GenTreePtr tree); + GenTreePtr optVNConstantPropOnJTrue(BasicBlock* block, GenTreePtr stmt, GenTreePtr test); + GenTreePtr optVNConstantPropOnTree(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree); + GenTreePtr optPrepareTreeForReplacement(GenTreePtr extractTree, GenTreePtr replaceTree); + + AssertionIndex GetAssertionCount() { return optAssertionCount; } - EXPSET_TP* bbJtrueAssertionOut; - typedef SimplerHashTable<ValueNum, SmallPrimitiveKeyFuncs<ValueNum>, EXPSET_TP, DefaultSimplerHashBehavior> ValueNumToAssertsMap; + ASSERT_TP* bbJtrueAssertionOut; + typedef SimplerHashTable<ValueNum, SmallPrimitiveKeyFuncs<ValueNum>, ASSERT_TP, DefaultSimplerHashBehavior> ValueNumToAssertsMap; ValueNumToAssertsMap* optValueNumToAsserts; - static const int NO_ASSERTION_INDEX = 0; + static const AssertionIndex NO_ASSERTION_INDEX = 0; // Assertion prop helpers. - AssertionDsc* optGetAssertion(unsigned assertIndex); - inline EXPSET_TP optGetAssertionBit(unsigned assertIndex) - { - assert((assertIndex > 0) && (assertIndex <= MAX_ASSERTION_CNT)); - return ((EXPSET_TP) 1 << (assertIndex - 1)); - }; + ASSERT_TP& GetAssertionDep(unsigned lclNum); + AssertionDsc* optGetAssertion(AssertionIndex assertIndex); void optAssertionInit(bool isLocalProp); + void optAssertionTraitsInit(AssertionIndex assertionCount); #if LOCAL_ASSERTION_PROP - void optAssertionReset(unsigned limit); - void optAssertionRemove(unsigned index); + void optAssertionReset(AssertionIndex limit); + void optAssertionRemove(AssertionIndex index); #endif // Assertion prop data flow functions. void optAssertionPropMain(); - void optInitAssertionDataflowFlags(EXPSET_TP* jumpDestOut, EXPSET_TP* jumpDestGen); GenTreePtr optVNAssertionPropCurStmt(BasicBlock* block, GenTreePtr stmt); - void optComputeAssertionGen(EXPSET_TP* jumpDestGen); bool optIsTreeKnownIntValue(bool vnBased, GenTreePtr tree, ssize_t* pConstant, unsigned* pIconFlags); + ASSERT_TP* optInitAssertionDataflowFlags(); + ASSERT_TP* optComputeAssertionGen(); // Assertion Gen functions. void optAssertionGen (GenTreePtr tree); - unsigned optAssertionGenPhiDefn(GenTreePtr tree); - unsigned optCreateJTrueArrayAssertion(GenTreePtr tree); - unsigned optAssertionGenJtrue (GenTreePtr tree); - unsigned optCreateJtrueAssertions(GenTreePtr op1, GenTreePtr op2, Compiler::optAssertionKind assertionKind); - unsigned optFindComplementary (unsigned assertionIndex); + AssertionIndex optAssertionGenPhiDefn(GenTreePtr tree); + AssertionIndex optCreateJTrueBoundsAssertion(GenTreePtr tree); + AssertionIndex optAssertionGenJtrue (GenTreePtr tree); + AssertionIndex optCreateJtrueAssertions(GenTreePtr op1, GenTreePtr op2, Compiler::optAssertionKind assertionKind); + AssertionIndex optFindComplementary (AssertionIndex assertionIndex); + void optMapComplementary (AssertionIndex assertionIndex, AssertionIndex index); // Assertion creation functions. - unsigned optCreateAssertion(GenTreePtr op1, GenTreePtr op2, optAssertionKind assertionKind); - unsigned optCreateAssertion(GenTreePtr op1, GenTreePtr op2, optAssertionKind assertionKind, AssertionDsc* assertion); - void optCreateComplementaryAssertion(const AssertionDsc& candidateAssertion, GenTreePtr op1, GenTreePtr op2); + AssertionIndex optCreateAssertion(GenTreePtr op1, GenTreePtr op2, optAssertionKind assertionKind); + AssertionIndex optCreateAssertion(GenTreePtr op1, GenTreePtr op2, optAssertionKind assertionKind, AssertionDsc* assertion); + void optCreateComplementaryAssertion(AssertionIndex assertionIndex, GenTreePtr op1, GenTreePtr op2); bool optAssertionVnInvolvesNan(AssertionDsc* assertion); - unsigned optAddAssertion (AssertionDsc* assertion); - void optAddVnAssertionMapping(ValueNum vn, const EXPSET_TP& mask); - EXPSET_TP optGetVnMappedAssertions(ValueNum vn); + AssertionIndex optAddAssertion (AssertionDsc* assertion); + void optAddVnAssertionMapping(ValueNum vn, AssertionIndex index); +#ifdef DEBUG + void optPrintVnAssertionMapping(); +#endif + ASSERT_TP optGetVnMappedAssertions(ValueNum vn); // Used for respective assertion propagations. - unsigned optAssertionIsSubrange(GenTreePtr tree, var_types toType, EXPSET_TP assertions); - unsigned optAssertionIsSubtype(GenTreePtr tree, GenTreePtr methodTableArg, EXPSET_TP assertions); - unsigned optAssertionIsNonNullInternal(GenTreePtr op, EXPSET_TP assertions); - bool optAssertionIsNonNull(GenTreePtr op, EXPSET_TP assertions DEBUGARG(bool* pVnBased) DEBUGARG(unsigned* pIndex)); + AssertionIndex optAssertionIsSubrange(GenTreePtr tree, var_types toType, ASSERT_VALARG_TP assertions); + AssertionIndex optAssertionIsSubtype(GenTreePtr tree, GenTreePtr methodTableArg, ASSERT_VALARG_TP assertions); + AssertionIndex optAssertionIsNonNullInternal(GenTreePtr op, ASSERT_VALARG_TP assertions); + bool optAssertionIsNonNull(GenTreePtr op, ASSERT_VALARG_TP assertions DEBUGARG(bool* pVnBased) DEBUGARG(AssertionIndex* pIndex)); // Used for Relop propagation. - unsigned optGlobalAssertionIsEqualOrNotEqual(EXPSET_TP assertions, GenTreePtr op1, GenTreePtr op2); - unsigned optLocalAssertionIsEqualOrNotEqual(optOp1Kind op1Kind, unsigned lclNum, optOp2Kind op2Kind, ssize_t cnsVal, EXPSET_TP assertions); + AssertionIndex optGlobalAssertionIsEqualOrNotEqual(ASSERT_VALARG_TP assertions, GenTreePtr op1, GenTreePtr op2); + AssertionIndex optLocalAssertionIsEqualOrNotEqual(optOp1Kind op1Kind, unsigned lclNum, optOp2Kind op2Kind, ssize_t cnsVal, ASSERT_VALARG_TP assertions); // Assertion prop for lcl var functions. bool optAssertionProp_LclVarTypeCheck(GenTreePtr tree, LclVarDsc* lclVarDsc, LclVarDsc* copyVarDsc); - GenTreePtr optCopyAssertionProp(AssertionDsc* curAssertion, GenTreePtr tree, GenTreePtr stmt DEBUGARG(unsigned index)); - GenTreePtr optConstantAssertionProp(AssertionDsc* curAssertion, const GenTreePtr tree, const GenTreePtr stmt DEBUGARG(unsigned index)); + GenTreePtr optCopyAssertionProp(AssertionDsc* curAssertion, GenTreePtr tree, GenTreePtr stmt DEBUGARG(AssertionIndex index)); + GenTreePtr optConstantAssertionProp(AssertionDsc* curAssertion, const GenTreePtr tree, const GenTreePtr stmt DEBUGARG(AssertionIndex index)); GenTreePtr optVnConstantAssertionProp(const GenTreePtr tree, const GenTreePtr stmt); // Assertion propagation functions. - GenTreePtr optAssertionProp(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); - GenTreePtr optAssertionProp_LclVar(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); - GenTreePtr optAssertionProp_Ind(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); - GenTreePtr optAssertionProp_Cast(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); - GenTreePtr optAssertionProp_Call(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); - GenTreePtr optAssertionProp_RelOp(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); - GenTreePtr optAssertionProp_Comma(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); - GenTreePtr optAssertionProp_BndsChk(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); - GenTreePtr optAssertionPropGlobal_RelOp(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); - GenTreePtr optAssertionPropLocal_RelOp(EXPSET_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); + GenTreePtr optAssertionProp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); + GenTreePtr optAssertionProp_LclVar(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); + GenTreePtr optAssertionProp_Ind(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); + GenTreePtr optAssertionProp_Cast(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); + GenTreePtr optAssertionProp_Call(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); + GenTreePtr optAssertionProp_RelOp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); + GenTreePtr optAssertionProp_Comma(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); + GenTreePtr optAssertionProp_BndsChk(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); + GenTreePtr optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); + GenTreePtr optAssertionPropLocal_RelOp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); GenTreePtr optAssertionProp_Update(const GenTreePtr newTree, const GenTreePtr tree, const GenTreePtr stmt); + GenTreePtr optNonNullAssertionProp_Call(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt); // Implied assertion functions. - EXPSET_TP optImpliedAssertions(unsigned assertionIndex, EXPSET_TP activeAssertions); - EXPSET_TP optImpliedByTypeOfAssertions(EXPSET_TP activeAssertions); - EXPSET_TP optImpliedByCopyAssertion(AssertionDsc* copyAssertion, AssertionDsc* depAssertion); - EXPSET_TP optImpliedByConstAssertion(AssertionDsc* curAssertion); + void optImpliedAssertions(AssertionIndex assertionIndex, ASSERT_TP& activeAssertions); + void optImpliedByTypeOfAssertions(ASSERT_TP& activeAssertions); + void optImpliedByCopyAssertion(AssertionDsc* copyAssertion, AssertionDsc* depAssertion, ASSERT_TP& result); + void optImpliedByConstAssertion(AssertionDsc* curAssertion, ASSERT_TP& result); + + ASSERT_VALRET_TP optNewFullAssertSet(); + ASSERT_VALRET_TP optNewEmptyAssertSet(); #ifdef DEBUG - void optPrintAssertion (AssertionDsc* newAssertion, unsigned assertionIndex=0); - void optDebugCheckAssertions(unsigned index); + void optPrintAssertion(AssertionDsc* newAssertion, AssertionIndex assertionIndex=0); + void optDebugCheckAssertions(AssertionIndex AssertionIndex); #endif void optAddCopies(); #endif // ASSERTION_PROP @@ -5770,7 +5971,6 @@ public : bool optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context); static fgWalkPreFn optCanOptimizeByLoopCloningVisitor; fgWalkResult optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info); - void optOptimizeIndexChecks(); void optObtainLoopCloningOpts(LoopCloneContext* context); bool optIsLoopClonable(unsigned loopInd); @@ -5824,7 +6024,6 @@ protected : bool optIsNoMore (GenTreePtr op1, GenTreePtr op2, int add1 = 0, int add2 = 0); #endif - void optOptimizeInducIndexChecks(unsigned loopNum, arraySizes arrayDesc[]); bool optReachWithoutCall(BasicBlock * srcBB, BasicBlock * dstBB); @@ -6370,6 +6569,9 @@ public : void eeSetEHinfo(unsigned EHnumber, const CORINFO_EH_CLAUSE* clause); + + WORD eeGetRelocTypeHint(void * target); + // ICorStaticInfo wrapper functions #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) @@ -6387,11 +6589,9 @@ public : const wchar_t * eeGetCPString (size_t stringHandle); #endif -#if defined(DEBUG) || INLINE_MATH const char * eeGetFieldName (CORINFO_FIELD_HANDLE fieldHnd, const char ** classNamePtr = NULL); const char* eeGetClassName (CORINFO_CLASS_HANDLE clsHnd); -#endif static CORINFO_METHOD_HANDLE eeFindHelper (unsigned helper); static CorInfoHelpFunc eeGetHelperNum (CORINFO_METHOD_HANDLE method); @@ -6785,9 +6985,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX CORINFO_METHOD_HANDLE SIMDVectorFloat_get_Length; CORINFO_METHOD_HANDLE SIMDVectorFloat_op_Addition; - // Check typeHnd to see if it is a SIMD type, and if so modify the type on the varDsc accordingly. - void checkForSIMDType(LclVarDsc* varDsc, CORINFO_CLASS_HANDLE typeHnd); - // Returns true if the tree corresponds to a TYP_SIMD lcl var. // Note that both SIMD vector args and locals are mared as lvSIMDType = true, but // type of an arg node is TYP_BYREF and a local node is TYP_SIMD or TYP_STRUCT. @@ -6796,14 +6993,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX return tree->OperIsLocal() && lvaTable[tree->AsLclVarCommon()->gtLclNum].lvSIMDType; } - // Returns true if the type of the tree can be inferred as TYP_SIMD - bool isSIMDType(GenTree* tree) - { - return varTypeIsSIMD(tree) || - (tree->OperGet() == GT_SIMD && tree->TypeGet() == TYP_STRUCT) || - isSIMDTypeLocal(tree); - } - // Returns true if the type of the tree is a byref of TYP_SIMD bool isAddrOfSIMDType(GenTree* tree) { @@ -6812,7 +7001,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX switch(tree->OperGet()) { case GT_ADDR: - return isSIMDType(tree->gtGetOp1()); + return varTypeIsSIMD(tree->gtGetOp1()); case GT_LCL_VAR_ADDR: return lvaTable[tree->AsLclVarCommon()->gtLclNum].lvSIMDType; @@ -6847,21 +7036,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX return TYP_UNKNOWN; } -#ifdef RYUJIT_CTPBUILD - // Note that, although the type handles are instance members of Compiler, the - // assembly handle is a static. This is so that we can avoid checking the - // assembly name at every call. - static volatile CORINFO_ASSEMBLY_HANDLE SIMDAssemblyHandle; - bool isSIMDModule(CORINFO_MODULE_HANDLE moduleHnd); -#endif // RYUJIT_CTPBUILD - bool isSIMDClass(CORINFO_CLASS_HANDLE clsHnd) { -#ifdef RYUJIT_CTPBUILD - return isSIMDModule(info.compCompHnd->getClassModule(clsHnd)); -#else // !RYUJIT_CTPBUILD return info.compCompHnd->isInSIMDModule(clsHnd); -#endif // !RYUJIT_CTPBUILD } bool isSIMDClass(typeInfo* pTypeInfo) @@ -6891,10 +7068,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Pops and returns GenTree node from importers type stack. // Normalizes TYP_STRUCT value in case of GT_CALL, GT_RET_EXPR and arg nodes. - GenTreePtr impSIMDPopStack(bool expectAddr = false); + GenTreePtr impSIMDPopStack(var_types type, bool expectAddr = false); // Create a GT_SIMD tree for a Get property of SIMD vector with a fixed index. - GenTreeSIMD* impSIMDGetFixed(var_types baseType, + GenTreeSIMD* impSIMDGetFixed(var_types simdType, + var_types baseType, unsigned simdSize, int index); @@ -6988,7 +7166,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX return (simdNode->gtSIMDSize < getSIMDVectorRegisterByteLength()); } - // Get the type for the hardware SIMD vector + // Get the type for the hardware SIMD vector. + // This is the maximum SIMD type supported for this target. var_types getSIMDVectorType() { #if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) @@ -7024,7 +7203,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Get preferred alignment of SIMD type given by its type handle int getSIMDTypeAlignment(CORINFO_CLASS_HANDLE typeHnd); - // Get the number of bytes in a SIMD Vector. + // Get the number of bytes in a SIMD Vector for the current compilation. unsigned getSIMDVectorRegisterByteLength() { #if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) @@ -7043,15 +7222,30 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #endif } + // The minimum and maximum possible number of bytes in a SIMD vector. + unsigned int maxSIMDStructBytes() + { + return getSIMDVectorRegisterByteLength(); + } + unsigned int minSIMDStructBytes() + { + return emitTypeSize(TYP_SIMD8); + } + + // (maxPossibleSIMDStructBytes is for use in a context that requires a compile-time constant.) +#ifdef FEATURE_AVX_SUPPORT + static const unsigned maxPossibleSIMDStructBytes = 32; +#else // !FEATURE_AVX_SUPPORT + static const unsigned maxPossibleSIMDStructBytes = 16; +#endif // !FEATURE_AVX_SUPPORT + // Returns the codegen type for a given SIMD size. - // TODO-Cleanup: Either eliminate this, once we have "plumbed" the SIMD types all the way - // through the JIT, or consider having different TYP_XX for the various sizes. var_types getSIMDTypeForSize(unsigned size) { var_types simdType = TYP_UNDEF; if (size == 8) { - simdType = TYP_DOUBLE; + simdType = TYP_SIMD8; } else if (size == 12) { @@ -7086,6 +7280,29 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #endif // FEATURE_SIMD +public: + //------------------------------------------------------------------------ + // largestEnregisterableStruct: The size in bytes of the largest struct that can be enregistered. + // + // Notes: It is not guaranteed that the struct of this size or smaller WILL be a + // candidate for enregistration. + + unsigned largestEnregisterableStructSize() + { +#ifdef FEATURE_SIMD + unsigned vectorRegSize = getSIMDVectorRegisterByteLength(); + if (vectorRegSize > TARGET_POINTER_SIZE) + { + return vectorRegSize; + } + else +#endif // FEATURE_SIMD + { + return TARGET_POINTER_SIZE; + } + } +private: + // These routines need not be enclosed under FEATURE_SIMD since lvIsSIMDType() // is defined for both FEATURE_SIMD and !FEATURE_SIMD apropriately. The use // of this routines also avoids the need of #ifdef FEATURE_SIMD specific code. @@ -7162,6 +7379,7 @@ public : JitInlineResult compInlineResult; // The result of importing the inlinee method. + bool compDoAggressiveInlining; // If true, mark every method as CORINFO_FLG_FORCEINLINE bool compJmpOpUsed; // Does the method do a JMP bool compLongUsed; // Does the method use TYP_LONG bool compFloatingPointUsed; // Does the method use TYP_FLOAT or TYP_DOUBLE @@ -7170,6 +7388,8 @@ public : bool compQmarkUsed; // Does the method use GT_QMARK/GT_COLON bool compQmarkRationalized; // Is it allowed to use a GT_QMARK/GT_COLON node. bool compUnsafeCastUsed; // Does the method use LDIND/STIND to cast between scalar/refernce types + + // NOTE: These values are only reliable after // the importing is completely finished. @@ -7254,25 +7474,27 @@ public : bool compMinOpts; unsigned instrCount; unsigned lvRefCount; -# ifdef DEBUG bool compMinOptsIsSet; +# ifdef DEBUG bool compMinOptsIsUsed; - inline void SetMinOpts(bool val) - { assert(!compMinOptsIsUsed); - assert(!compMinOptsIsSet || (compMinOpts == val)); - compMinOpts = val; - compMinOptsIsSet = true; - } inline bool MinOpts() { assert(compMinOptsIsSet); compMinOptsIsUsed = true; return compMinOpts; } + inline bool IsMinOptsSet() { return compMinOptsIsSet; } # else // !DEBUG inline bool MinOpts() { return compMinOpts; } - inline void SetMinOpts(bool val) { compMinOpts = val; } + inline bool IsMinOptsSet() { return compMinOptsIsSet; } # endif // !DEBUG + inline void SetMinOpts(bool val) + { + assert(!compMinOptsIsUsed); + assert(!compMinOptsIsSet || (compMinOpts == val)); + compMinOpts = val; + compMinOptsIsSet = true; + } //true if the CLFLG_* for an optimization is set. inline bool OptEnabled(unsigned optFlag) { return !!(compFlags & optFlag); } @@ -7339,6 +7561,13 @@ public : #if RELOC_SUPPORT bool compReloc; #endif + +#ifdef DEBUG +#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) + bool compEnablePCRelAddr; // Whether absolute addr be encoded as PC-rel offset by RyuJIT where possible +#endif +#endif //DEBUG + #ifdef UNIX_AMD64_ABI // This flag is indicating if there is a need to align the frame. // On AMD64-Windows, if there are calls, 4 slots for the outgoing ars are allocated, except for @@ -7348,6 +7577,7 @@ public : // are calls and making sure the frame alignment logic is executed. bool compNeedToAlignFrame; #endif // UNIX_AMD64_ABI + bool compProcedureSplitting; // Separate cold code from hot code bool genFPorder; // Preserve FP order (operations are non-commutative) @@ -7400,6 +7630,8 @@ public : #if FEATURE_TAILCALL_OPT // Whether opportunistic or implicit tail call optimization is enabled. bool compTailCallOpt; + // Whether optimization of transforming a recursive tail call into a loop is enabled. + bool compTailCallLoopOpt; #endif GCPollType compGCPollType; @@ -7455,7 +7687,7 @@ public : /* hide/trivialize other areas */ \ \ STRESS_MODE(REGS) STRESS_MODE(DBL_ALN) STRESS_MODE(LCL_FLDS) STRESS_MODE(UNROLL_LOOPS) \ - STRESS_MODE(MAKE_CSE) STRESS_MODE(ENREG_FP) STRESS_MODE(INLINE) STRESS_MODE(CLONE_EXPR) \ + STRESS_MODE(MAKE_CSE) STRESS_MODE(INLINE) STRESS_MODE(CLONE_EXPR) \ STRESS_MODE(USE_FCOMI) STRESS_MODE(USE_CMOV) STRESS_MODE(FOLD) \ STRESS_MODE(BB_PROFILE) STRESS_MODE(OPT_BOOLS_GC) STRESS_MODE(REMORPH_TREES) \ STRESS_MODE(64RSLT_MUL) STRESS_MODE(DO_WHILE_LOOPS) STRESS_MODE(MIN_OPTS) \ @@ -7687,11 +7919,13 @@ public : #if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) // Mask of callee saved float regs on stack. regMaskTP compCalleeFPRegsSavedMask; - +#endif +#ifdef _TARGET_AMD64_ // Quirk for VS debug-launch scenario to work: // Bytes of padding between save-reg area and locals. #define VSQUIRK_STACK_PAD (2*REGSIZE_BYTES) unsigned compVSQuirkStackPaddingNeeded; + bool compQuirkForPPPflag; #endif unsigned compArgSize; // total size of arguments in bytes (including register args (lvIsRegArg)) @@ -7987,7 +8221,9 @@ protected: void *compProfilerMethHnd; // Profiler handle of the method being compiled. Passed as param to ELT callbacks bool compProfilerMethHndIndirected; // Whether compProfilerHandle is pointer to the handle or is an actual handle #endif - +#ifdef _TARGET_AMD64_ + bool compQuirkForPPP(); // Check if this method should be Quirked for the PPP issue +#endif public: // Assumes called as part of process shutdown; does any compiler-specific work associated with that. static void ProcessShutdownWork(ICorStaticInfo* statInfo); @@ -8285,7 +8521,7 @@ public: struct ShadowParamVarInfo { FixedBitVect *assignGroup; // the closure set of variables whose values depend on each other - unsigned shadowCopy; // valid only if mayNeedShadowCopy()==true + unsigned shadowCopy; // Lcl var num, valid only if not set to NO_SHADOW_COPY static bool mayNeedShadowCopy(LclVarDsc* varDsc) { @@ -8392,8 +8628,19 @@ public: // Is the compilation in a full trust context? bool compIsFullTrust(); +#ifndef FEATURE_TRACELOGGING // Should we actually fire the noway assert body and the exception handler? bool compShouldThrowOnNoway(); +#else // FEATURE_TRACELOGGING + // Should we actually fire the noway assert body and the exception handler? + bool compShouldThrowOnNoway(const char* filename, unsigned line); + + // Telemetry instance to use per method compilation. + JitTelemetry compJitTelemetry; + + // Get common parameters that have to be logged with most telemetry data. + void compGetTelemetryDefaults(const char** assemblyName, const char** scopeName, const char** methodName, unsigned* methodHash); +#endif // !FEATURE_TRACELOGGING #ifdef DEBUG private: @@ -8872,12 +9119,28 @@ void cEH(Compiler* comp); void cVar(Compiler* comp, unsigned lclNum); void cVarDsc(Compiler* comp, LclVarDsc* varDsc); void cVars(Compiler* comp); +void cVarsFinal(Compiler* comp); void cBlockPreds(Compiler* comp, BasicBlock* block); void cReach(Compiler* comp); void cDoms(Compiler* comp); void cLiveness(Compiler* comp); void cCVarSet(Compiler* comp, VARSET_VALARG_TP vars); +void cFuncIR(Compiler* comp); +void cBlockIR(Compiler* comp, BasicBlock* block); +void cLoopIR(Compiler* comp, Compiler::LoopDsc* loop); +void cTreeIR(Compiler* comp, GenTree* tree); +int cTreeTypeIR(Compiler *comp, GenTree *tree); +int cTreeKindsIR(Compiler *comp, GenTree *tree); +int cTreeFlagsIR(Compiler *comp, GenTree *tree); +int cOperandIR(Compiler* comp, GenTree* operand); +int cLeafIR(Compiler *comp, GenTree* tree); +int cIndirIR(Compiler *comp, GenTree* tree); +int cListIR(Compiler* comp, GenTree* list); +int cSsaNumIR(Compiler *comp, GenTree *tree); +int cValNumIR(Compiler *comp, GenTree *tree); +int cDependsIR(Compiler* comp, GenTree* comma, bool *first); + void dBlock(BasicBlock* block); void dBlocks(); void dBlocksV(); @@ -8887,6 +9150,7 @@ void dEH(); void dVar(unsigned lclNum); void dVarDsc(LclVarDsc* varDsc); void dVars(); +void dVarsFinal(); void dBlockPreds(BasicBlock* block); void dReach(); void dDoms(); @@ -8896,6 +9160,29 @@ void dCVarSet(VARSET_VALARG_TP vars); void dVarSet(VARSET_VALARG_TP vars); void dRegMask(regMaskTP mask); +void dFuncIR(); +void dBlockIR(BasicBlock* block); +void dTreeIR(GenTree* tree); +void dLoopIR(Compiler::LoopDsc* loop); +void dLoopNumIR(unsigned loopNum); +int dTabStopIR(int curr, int tabstop); +int dTreeTypeIR(GenTree *tree); +int dTreeKindsIR(GenTree *tree); +int dTreeFlagsIR(GenTree *tree); +int dOperandIR(GenTree* operand); +int dLeafIR(GenTree* tree); +int dIndirIR(GenTree* tree); +int dListIR(GenTree* list); +int dSsaNumIR(GenTree *tree); +int dValNumIR(GenTree *tree); +int dDependsIR(GenTree* comma); +void dFormatIR(); + +GenTree* dFindTree(GenTree* tree, unsigned id); +GenTree* dFindTree(unsigned id); +GenTreeStmt* dFindStmt(unsigned id); +BasicBlock* dFindBlock(unsigned bbNum); + #endif // DEBUG #include "compiler.hpp" // All the shared inline functions diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp index e9f09eac77..5083baa9b3 100644 --- a/src/jit/compiler.hpp +++ b/src/jit/compiler.hpp @@ -18,6 +18,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "emit.h" // for emitter::emitAddLabel +#include "bitvec.h" + #include "compilerbitsettraits.hpp" /* @@ -508,6 +510,8 @@ inline unsigned Compiler::funGetFuncIdx(BasicBlock * block) inline regNumber genRegNumFromMask(regMaskTP mask) { + assert(mask != 0); // Must have one bit set, so can't have a mask of zero + /* Convert the mask to a register number */ regNumber regNum = (regNumber)genLog2(mask); @@ -635,37 +639,38 @@ bool isRegParamType(var_types type) } #ifdef _TARGET_AMD64_ -/***************************************************************************** - * Returns true if 'type' is a struct of size 1, 2,4 or 8 bytes. - * 'typeClass' is the class handle of 'type'. - * Out param 'typeSize' (if non-null) is updated with the size of 'type'. - */ +/*****************************************************************************/ + // Returns true if 'type' is a struct that can be enregistered for call args + // or can be returned by value in multiple registers. + // if 'type' is not a struct the return value will be false. + // + // Arguments: + // type - the basic jit var_type for the item being queried + // typeClass - the handle for the struct when 'type' is TYP_STRUCT + // typeSize - Out param (if non-null) is updated with the size of 'type'. + // forReturn - this is true when we asking about a GT_RETURN context; + // this is false when we are asking about an argument context + // inline bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types type, CORINFO_CLASS_HANDLE typeClass, - unsigned *typeSize) + unsigned *typeSize, + bool forReturn) { bool result = false; unsigned size = 0; - if (type == TYP_STRUCT) + if (varTypeIsStruct(type)) { size = info.compCompHnd->getClassSize(typeClass); #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING // Account for the classification of the struct. result = IsRegisterPassable(typeClass); #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING - switch(size) + type = argOrReturnTypeForStruct(size, typeClass, forReturn); + if (type != TYP_UNKNOWN) { - case 1: - case 2: - case 4: - case 8: - result = true; - break; - - default: - break; + result = true; } #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING } @@ -1147,7 +1152,7 @@ GenTreePtr Compiler::gtNewFieldRef(var_types typ, // If "obj" is the address of a local, note that a field of that struct local has been accessed. if (obj != NULL && obj->OperGet() == GT_ADDR && - obj->gtOp.gtOp1->gtType == TYP_STRUCT && // ignore "normed structs" since they won't be struct promoted + varTypeIsStruct(obj->gtOp.gtOp1) && obj->gtOp.gtOp1->OperGet() == GT_LCL_VAR) { unsigned lclNum = obj->gtOp.gtOp1->gtLclVarCommon.gtLclNum; @@ -1539,18 +1544,6 @@ bool GenTree::gtOverflowEx() const return false; } -/*****************************************************************************/ - -#ifdef DEBUG - -inline -bool Compiler::gtDblWasInt(GenTree * tree) const -{ - return (tree->IsLocal() && lvaTable[tree->gtLclVarCommon.gtLclNum].lvDblWasInt); -} - -#endif - /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX @@ -1789,7 +1782,7 @@ void LclVarDsc::decRefCnts(BasicBlock::weight_t weight, Compiler * comp { /* Decrement lvRefCnt and lvRefCntWtd */ Compiler::lvaPromotionType promotionType = DUMMY_INIT(Compiler::PROMOTION_TYPE_NONE); - if (lvType == TYP_STRUCT) + if (varTypeIsStruct(lvType)) { promotionType = comp->lvaGetPromotionType(this); } @@ -1829,7 +1822,7 @@ void LclVarDsc::decRefCnts(BasicBlock::weight_t weight, Compiler * comp } } - if (lvType == TYP_STRUCT && propagate) + if (varTypeIsStruct(lvType) && propagate) { // For promoted struct locals, decrement lvRefCnt on its field locals as well. if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT || @@ -1878,7 +1871,7 @@ inline void LclVarDsc::incRefCnts(BasicBlock::weight_t weight, Compiler *comp, bool propagate) { Compiler::lvaPromotionType promotionType = DUMMY_INIT(Compiler::PROMOTION_TYPE_NONE); - if (lvType == TYP_STRUCT) + if (varTypeIsStruct(lvType)) { promotionType = comp->lvaGetPromotionType(this); } @@ -1921,7 +1914,7 @@ void LclVarDsc::incRefCnts(BasicBlock::weight_t weight, Compiler *com } } - if (lvType == TYP_STRUCT && propagate) + if (varTypeIsStruct(lvType) && propagate) { // For promoted struct locals, increment lvRefCnt on its field locals as well. if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT || @@ -2122,7 +2115,7 @@ VARSET_VALRET_TP Compiler::lvaStmtLclMask(GenTreePtr stmt) /***************************************************************************** * Returns true if the lvType is a TYP_REF or a TYP_BYREF. - * When the lvType is TYP_STRUCT it searches the GC layout + * When the lvType is a TYP_STRUCT it searches the GC layout * of the struct and returns true iff it contains a GC ref. */ @@ -2829,9 +2822,9 @@ bool Compiler::fgIsThrowHlpBlk(BasicBlock * block) if (!((call->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_RNGCHKFAIL)) || (call->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWDIVZERO)) || -#ifndef RYUJIT_CTPBUILD +#if COR_JIT_EE_VERSION > 460 (call->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWNULLREF)) || -#endif +#endif // COR_JIT_EE_VERSION (call->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_OVERFLOW)))) return false; @@ -2846,11 +2839,11 @@ bool Compiler::fgIsThrowHlpBlk(BasicBlock * block) return add->acdKind == SCK_RNGCHK_FAIL || add->acdKind == SCK_DIV_BY_ZERO || add->acdKind == SCK_OVERFLOW -#ifndef RYUJIT_CTPBUILD +#if COR_JIT_EE_VERSION > 460 || add->acdKind == SCK_ARG_EXCPN || add->acdKind == SCK_ARG_RNG_EXCPN -#endif //!RYUJIT_CTPBUILD - ; +#endif //COR_JIT_EE_VERSION + ; } } @@ -2875,16 +2868,13 @@ unsigned Compiler::fgThrowHlpBlkStkLevel(BasicBlock *block) bool cond = (add->acdKind == SCK_RNGCHK_FAIL || add->acdKind == SCK_DIV_BY_ZERO || add->acdKind == SCK_OVERFLOW -#ifndef RYUJIT_CTPBUILD +#if COR_JIT_EE_VERSION > 460 || add->acdKind == SCK_ARG_EXCPN || add->acdKind == SCK_ARG_RNG_EXCPN -#endif //!RYUJIT_CTPBUILD - ); +#endif //COR_JIT_EE_VERSION + ); assert(cond); - - assert(add->acdKind == SCK_RNGCHK_FAIL || - add->acdKind == SCK_DIV_BY_ZERO || - add->acdKind == SCK_OVERFLOW); + // TODO: bbTgtStkDepth is DEBUG-only. // Should we use it regularly and avoid this search. assert(block->bbTgtStkDepth == add->acdStkLvl); @@ -3441,17 +3431,17 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ inline -void Compiler::optAssertionReset(unsigned limit) +void Compiler::optAssertionReset(AssertionIndex limit) { - PREFAST_ASSUME(optAssertionCount <= MAX_ASSERTION_CNT); + PREFAST_ASSUME(optAssertionCount <= optMaxAssertionCount); while (optAssertionCount > limit) { - unsigned index = optAssertionCount--; + AssertionIndex index = optAssertionCount--; AssertionDsc* curAssertion = optGetAssertion(index); unsigned lclNum = curAssertion->op1.lcl.lclNum; assert(lclNum < lvaTableCnt); - lvaTable[lclNum].lvAssertionDep &= ~optGetAssertionBit(index); + BitVecOps::RemoveElemD(apTraits, GetAssertionDep(lclNum), index - 1); // // Find the Copy assertions @@ -3464,15 +3454,15 @@ void Compiler::optAssertionReset(unsigned limit) // op2.lcl.lclNum no longer depends upon this assertion // lclNum = curAssertion->op2.lcl.lclNum; - lvaTable[lclNum].lvAssertionDep &= ~optGetAssertionBit(index); + BitVecOps::RemoveElemD(apTraits, GetAssertionDep(lclNum), index - 1); } } while (optAssertionCount < limit) { - unsigned index = ++optAssertionCount; + AssertionIndex index = ++optAssertionCount; AssertionDsc* curAssertion = optGetAssertion(index); unsigned lclNum = curAssertion->op1.lcl.lclNum; - lvaTable[lclNum].lvAssertionDep |= optGetAssertionBit(index); + BitVecOps::AddElemD(apTraits, GetAssertionDep(lclNum), index - 1); // // Check for Copy assertions @@ -3485,7 +3475,7 @@ void Compiler::optAssertionReset(unsigned limit) // op2.lcl.lclNum now depends upon this assertion // lclNum = curAssertion->op2.lcl.lclNum; - lvaTable[lclNum].lvAssertionDep |= optGetAssertionBit(index); + BitVecOps::AddElemD(apTraits, GetAssertionDep(lclNum), index - 1); } } } @@ -3497,27 +3487,27 @@ void Compiler::optAssertionReset(unsigned limit) */ inline -void Compiler::optAssertionRemove(unsigned index) +void Compiler::optAssertionRemove(AssertionIndex index) { assert(index > 0); assert(index <= optAssertionCount); - PREFAST_ASSUME(optAssertionCount <= MAX_ASSERTION_CNT); + PREFAST_ASSUME(optAssertionCount <= optMaxAssertionCount); AssertionDsc* curAssertion = optGetAssertion(index); // Two cases to consider if (index == optAssertionCount) then the last // entry in the table is to be removed and that happens automatically when - // optAssertionCount is decremented and we can just clear the lvAssertionDep bits + // optAssertionCount is decremented and we can just clear the optAssertionDep bits // The other case is when index < optAssertionCount and here we overwrite the // index-th entry in the table with the data found at the end of the table - // Since we are reordering the rable the lvAssertionDep bits need to be recreated + // Since we are reordering the rable the optAssertionDep bits need to be recreated // using optAssertionReset(0) and optAssertionReset(newAssertionCount) will - // correctly update the lvAssertionDep bits + // correctly update the optAssertionDep bits // if (index == optAssertionCount) { unsigned lclNum = curAssertion->op1.lcl.lclNum; - lvaTable[lclNum].lvAssertionDep &= ~optGetAssertionBit(index); + BitVecOps::RemoveElemD(apTraits, GetAssertionDep(lclNum), index - 1); // // Check for Copy assertions @@ -3530,15 +3520,15 @@ void Compiler::optAssertionRemove(unsigned index) // op2.lcl.lclNum no longer depends upon this assertion // lclNum = curAssertion->op2.lcl.lclNum; - lvaTable[lclNum].lvAssertionDep &= ~optGetAssertionBit(index); + BitVecOps::RemoveElemD(apTraits, GetAssertionDep(lclNum), index - 1); } optAssertionCount--; } else { - AssertionDsc* lastAssertion = optGetAssertion(optAssertionCount); - unsigned newAssertionCount = optAssertionCount-1; + AssertionDsc* lastAssertion = optGetAssertion(optAssertionCount); + AssertionIndex newAssertionCount = optAssertionCount-1; optAssertionReset(0); // This make optAssertionCount equal 0 @@ -4448,9 +4438,9 @@ bool Compiler::lvaIsFieldOfDependentlyPromotedStruct (const LclVarDsc * varD // Returns true if the variable should be reported as tracked in the GC info. // // Notes: -// This never returns true for TYP_STRUCT variables, even if they are tracked. -// This is because TYP_STRUCT variables are never tracked as a whole for GC purposes. -// It is up to the caller to ensure that the fields of TYP_STRUCT variables are +// This never returns true for struct variables, even if they are tracked. +// This is because struct variables are never tracked as a whole for GC purposes. +// It is up to the caller to ensure that the fields of struct variables are // correctly tracked. // On Amd64, we never GC-track fields of dependently promoted structs, even // though they may be tracked for optimization purposes. @@ -4482,7 +4472,39 @@ inline void Compiler::EndPhase(Phases phase) #if defined(FEATURE_JIT_METHOD_PERF) if (pCompJitTimer != NULL) pCompJitTimer->EndPhase(phase); #endif +#if DUMP_FLOWGRAPHS + fgDumpFlowGraph(phase); +#endif // DUMP_FLOWGRAPHS previousCompletedPhase = phase; +#ifdef DEBUG + if (dumpIR) + { + if ((*dumpIRPhase == L'*') + || (wcscmp(dumpIRPhase, PhaseShortNames[phase]) == 0)) + { + printf("\n"); + printf("IR after %s (switch: %ls)\n", PhaseEnums[phase], PhaseShortNames[phase]); + printf("\n"); + + if (dumpIRLinear) + { + dFuncIR(); + } + else if (dumpIRTrees) + { + dTrees(); + } + + // If we are just dumping a single method and we have a request to exit + // after dumping, do so now. + + if (dumpIRExit && ((*dumpIRPhase != L'*') || (phase == PHASE_EMIT_GCEH))) + { + exit(0); + } + } + } +#endif } /*****************************************************************************/ @@ -4664,15 +4686,16 @@ inline bool BasicBlock::endsWithJmpMethod(Compiler *comp) { if (comp->compJmpOpUsed && (bbJumpKind == BBJ_RETURN) && (bbFlags & BBF_HAS_JMP)) { - GenTreePtr last = bbTreeList->gtPrev; + GenTreePtr last = comp->fgGetLastTopLevelStmt(this); assert(last != nullptr); - return last->gtStmt.gtStmtExpr->gtOper == GT_JMP; } return false; } +#if FEATURE_FASTTAILCALL + // Returns true if the basic block ends with either // i) GT_JMP or // ii) tail call (implicit or explicit) @@ -4680,45 +4703,119 @@ inline bool BasicBlock::endsWithJmpMethod(Compiler *comp) // Params: // comp - Compiler instance // fastTailCallsOnly - Only consider fast tail calls excluding tail calls via helper. -inline bool BasicBlock::endsWithTailCallOrJmp(Compiler *comp, +inline bool BasicBlock::endsWithTailCallOrJmp(Compiler* comp, bool fastTailCallsOnly /*=false*/) { - // Is this jmp tail call? - if (endsWithJmpMethod(comp)) - { - return true; - } -#ifndef LEGACY_BACKEND + GenTreePtr tailCall = nullptr; + bool tailCallsConvertibleToLoopOnly = false; + return endsWithJmpMethod(comp) || endsWithTailCall(comp, fastTailCallsOnly, tailCallsConvertibleToLoopOnly, &tailCall); +} + +//------------------------------------------------------------------------------ +// endsWithTailCall : Check if the block ends with a tail call. +// +// Arguments: +// comp - compiler instance +// fastTailCallsOnly - check for fast tail calls only +// tailCallsConvertibleToLoopOnly - check for tail calls convertible to loop only +// tailCall - a pointer to a tree that will be set to the call tree if the block +// ends with a tail call and will be set to nullptr otherwise. +// +// Return Value: +// true if the block ends with a tail call; false otherwise. +// +// Notes: +// At most one of fastTailCallsOnly and tailCallsConvertibleToLoopOnly flags can be true. + +inline bool BasicBlock::endsWithTailCall(Compiler* comp, bool fastTailCallsOnly, bool tailCallsConvertibleToLoopOnly, GenTree** tailCall) +{ + assert(!fastTailCallsOnly || !tailCallsConvertibleToLoopOnly); + *tailCall = nullptr; + bool result = false; + // Is this a tail call? // The reason for keeping this under RyuJIT is so as not to impact existing Jit32 x86 and arm // targets. - else if (comp->compTailCallUsed) + if (comp->compTailCallUsed) { - bool result; - if (fastTailCallsOnly) + if (fastTailCallsOnly || tailCallsConvertibleToLoopOnly) { - // Only fast tail calls + // Only fast tail calls or only tail calls convertible to loops result = (bbFlags & BBF_HAS_JMP) && (bbJumpKind == BBJ_RETURN); } else { - // Both fast tail calls and tails calls dispatched via helper + // Fast tail calls, tail calls convertible to loops, and tails calls dispatched via helper result = (bbJumpKind == BBJ_THROW) || ((bbFlags & BBF_HAS_JMP) && (bbJumpKind == BBJ_RETURN)); } - if (result) { - GenTreePtr last = bbTreeList->gtPrev; + GenTreePtr last = comp->fgGetLastTopLevelStmt(this); assert(last != nullptr); last = last->gtStmt.gtStmtExpr; + if (last->OperGet() == GT_CALL) + { + GenTreeCall* call = last->AsCall(); + if (tailCallsConvertibleToLoopOnly) + { + result = call->IsTailCallConvertibleToLoop(); + } + else if (fastTailCallsOnly) + { + result = call->IsFastTailCall(); + } + else + { + result = call->IsTailCall(); + } - return (last->OperGet() == GT_CALL) && (fastTailCallsOnly ? last->AsCall()->IsFastTailCall() : last->AsCall()->IsTailCall()); + if (result) + { + *tailCall = call; + } + } + else + { + result = false; + } } } -#endif - return false; + return result; +} + +//------------------------------------------------------------------------------ +// endsWithTailCallConvertibleToLoop : Check if the block ends with a tail call convertible to loop. +// +// Arguments: +// comp - compiler instance +// tailCall - a pointer to a tree that will be set to the call tree if the block +// ends with a tail call convertible to loop and will be set to nullptr otherwise. +// +// Return Value: +// true if the block ends with a tail call convertible to loop. + +inline bool BasicBlock::endsWithTailCallConvertibleToLoop(Compiler* comp, GenTree** tailCall) +{ + bool fastTailCallsOnly = false; + bool tailCallsConvertibleToLoopOnly = true; + return endsWithTailCall(comp, fastTailCallsOnly, tailCallsConvertibleToLoopOnly, tailCall); +} + +#endif // FEATURE_FASTTAILCALL + +// Returns the last top level stmt of a given basic block. +// Returns nullptr if the block is empty. +inline GenTreePtr Compiler::fgGetLastTopLevelStmt(BasicBlock *block) +{ + // Return if the block is empty + if (block->bbTreeList == nullptr) + { + return nullptr; + } + + return fgFindTopLevelStmtBackwards(block->bbTreeList->gtPrev->AsStmt()); } // Creates an InitBlk or CpBlk node. @@ -4749,15 +4846,7 @@ inline GenTreeBlkOp* Compiler::gtCloneCpObjNode(GenTreeCpObj* source) inline static bool StructHasOverlappingFields(DWORD attribs) { -#ifdef RYUJIT_CTPBUILD - // For older RYUJIT_CTPBUILD fix: - // We use the slightly pessimistic flag CORINFO_FLG_CUSTOMLAYOUT - // This will always be set when we have any custom layout - // - return ((attribs & CORINFO_FLG_CUSTOMLAYOUT) != 0); -#else return ((attribs & CORINFO_FLG_OVERLAPPING_FIELDS) != 0); -#endif } inline static bool StructHasCustomLayout(DWORD attribs) diff --git a/src/jit/compphases.h b/src/jit/compphases.h index ffb7cc0967..b520ef2377 100644 --- a/src/jit/compphases.h +++ b/src/jit/compphases.h @@ -19,72 +19,73 @@ // (We should never do EndPhase on a phase that has children, only on 'leaf phases.') // "parent" is -1 for leaf phases, otherwise it is the "enumName" of the parent phase. -CompPhaseNameMacro(PHASE_PRE_IMPORT, "Pre-import", false, -1) -CompPhaseNameMacro(PHASE_IMPORTATION, "Importation", false, -1) -CompPhaseNameMacro(PHASE_POST_IMPORT, "Post-import", false, -1) -CompPhaseNameMacro(PHASE_MORPH, "Morph", false, -1) -CompPhaseNameMacro(PHASE_GS_COOKIE, "GS Cookie", false, -1) -CompPhaseNameMacro(PHASE_COMPUTE_PREDS, "Compute preds", false, -1) -CompPhaseNameMacro(PHASE_MARK_GC_POLL_BLOCKS, "Mark GC poll blocks", false, -1) -CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS, "Compute edge weights (1)", false, -1) +CompPhaseNameMacro(PHASE_PRE_IMPORT, "Pre-import", "PRE-IMP", false, -1) +CompPhaseNameMacro(PHASE_IMPORTATION, "Importation", "IMPORT", false, -1) +CompPhaseNameMacro(PHASE_POST_IMPORT, "Post-import", "POST-IMP", false, -1) +CompPhaseNameMacro(PHASE_MORPH, "Morph", "MORPH", false, -1) +CompPhaseNameMacro(PHASE_GS_COOKIE, "GS Cookie", "GS-COOK", false, -1) +CompPhaseNameMacro(PHASE_COMPUTE_PREDS, "Compute preds", "PREDS", false, -1) +CompPhaseNameMacro(PHASE_MARK_GC_POLL_BLOCKS, "Mark GC poll blocks", "GC-POLL", false, -1) +CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS, "Compute edge weights (1)", "EDG-WGT", false, -1) #if FEATURE_EH_FUNCLETS -CompPhaseNameMacro(PHASE_CREATE_FUNCLETS, "Create EH funclets", false, -1) +CompPhaseNameMacro(PHASE_CREATE_FUNCLETS, "Create EH funclets", "EH-FUNC", false, -1) #endif // FEATURE_EH_FUNCLETS -CompPhaseNameMacro(PHASE_OPTIMIZE_LAYOUT, "Optimize layout", false, -1) -CompPhaseNameMacro(PHASE_OPTIMIZE_LOOPS, "Optimize loops", false, -1) -CompPhaseNameMacro(PHASE_CLONE_LOOPS, "Clone loops", false, -1) -CompPhaseNameMacro(PHASE_UNROLL_LOOPS, "Unroll loops", false, -1) -CompPhaseNameMacro(PHASE_HOIST_LOOP_CODE, "Hoist loop code", false, -1) -CompPhaseNameMacro(PHASE_MARK_LOCAL_VARS, "Mark local vars", false, -1) -CompPhaseNameMacro(PHASE_OPTIMIZE_BOOLS, "Optimize bools", false, -1) -CompPhaseNameMacro(PHASE_FIND_OPER_ORDER, "Find oper order", false, -1) -CompPhaseNameMacro(PHASE_SET_BLOCK_ORDER, "Set block order", false, -1) -CompPhaseNameMacro(PHASE_BUILD_SSA, "Build SSA representation", true, -1) -CompPhaseNameMacro(PHASE_BUILD_SSA_TOPOSORT, "SSA: topological sort", false, PHASE_BUILD_SSA) -CompPhaseNameMacro(PHASE_BUILD_SSA_DOMS, "SSA: Doms1", false, PHASE_BUILD_SSA) -CompPhaseNameMacro(PHASE_BUILD_SSA_LIVENESS, "SSA: liveness", false, PHASE_BUILD_SSA) -CompPhaseNameMacro(PHASE_BUILD_SSA_IDF, "SSA: IDF", false, PHASE_BUILD_SSA) -CompPhaseNameMacro(PHASE_BUILD_SSA_INSERT_PHIS, "SSA: insert phis", false, PHASE_BUILD_SSA) -CompPhaseNameMacro(PHASE_BUILD_SSA_RENAME, "SSA: rename", false, PHASE_BUILD_SSA) +CompPhaseNameMacro(PHASE_OPTIMIZE_LAYOUT, "Optimize layout", "LAYOUT", false, -1) +CompPhaseNameMacro(PHASE_OPTIMIZE_LOOPS, "Optimize loops", "LOOP-OPT", false, -1) +CompPhaseNameMacro(PHASE_CLONE_LOOPS, "Clone loops", "LP-CLONE", false, -1) +CompPhaseNameMacro(PHASE_UNROLL_LOOPS, "Unroll loops", "UNROLL", false, -1) +CompPhaseNameMacro(PHASE_HOIST_LOOP_CODE, "Hoist loop code", "LP-HOIST", false, -1) +CompPhaseNameMacro(PHASE_MARK_LOCAL_VARS, "Mark local vars", "MARK-LCL", false, -1) +CompPhaseNameMacro(PHASE_OPTIMIZE_BOOLS, "Optimize bools", "OPT-BOOL", false, -1) +CompPhaseNameMacro(PHASE_FIND_OPER_ORDER, "Find oper order", "OPER-ORD", false, -1) +CompPhaseNameMacro(PHASE_SET_BLOCK_ORDER, "Set block order", "BLK-ORD", false, -1) +CompPhaseNameMacro(PHASE_BUILD_SSA, "Build SSA representation", "SSA", true, -1) +CompPhaseNameMacro(PHASE_BUILD_SSA_TOPOSORT, "SSA: topological sort", "SSA-SORT", false, PHASE_BUILD_SSA) +CompPhaseNameMacro(PHASE_BUILD_SSA_DOMS, "SSA: Doms1", "SSA-DOMS", false, PHASE_BUILD_SSA) +CompPhaseNameMacro(PHASE_BUILD_SSA_LIVENESS, "SSA: liveness", "SSA-LIVE", false, PHASE_BUILD_SSA) +CompPhaseNameMacro(PHASE_BUILD_SSA_IDF, "SSA: IDF", "SSA-IDF", false, PHASE_BUILD_SSA) +CompPhaseNameMacro(PHASE_BUILD_SSA_INSERT_PHIS, "SSA: insert phis", "SSA-PHI", false, PHASE_BUILD_SSA) +CompPhaseNameMacro(PHASE_BUILD_SSA_RENAME, "SSA: rename", "SSA-REN", false, PHASE_BUILD_SSA) -CompPhaseNameMacro(PHASE_VALUE_NUMBER, "Do value numbering", false, -1) +CompPhaseNameMacro(PHASE_EARLY_PROP, "Early Value Propagation", "ERL-PROP", false, -1) +CompPhaseNameMacro(PHASE_VALUE_NUMBER, "Do value numbering", "VAL-NUM", false, -1) -CompPhaseNameMacro(PHASE_OPTIMIZE_INDEX_CHECKS, "Optimize index checks", false, -1) +CompPhaseNameMacro(PHASE_OPTIMIZE_INDEX_CHECKS, "Optimize index checks", "OPT-CHK", false, -1) #if FEATURE_VALNUM_CSE -CompPhaseNameMacro(PHASE_OPTIMIZE_VALNUM_CSES, "Optimize Valnum CSEs", false, -1) +CompPhaseNameMacro(PHASE_OPTIMIZE_VALNUM_CSES, "Optimize Valnum CSEs", "OPT-CSE", false, -1) #endif -CompPhaseNameMacro(PHASE_VN_COPY_PROP, "VN based copy prop", false, -1) +CompPhaseNameMacro(PHASE_VN_COPY_PROP, "VN based copy prop", "CP-PROP", false, -1) #if ASSERTION_PROP -CompPhaseNameMacro(PHASE_ASSERTION_PROP_MAIN, "Assertion prop", false, -1) +CompPhaseNameMacro(PHASE_ASSERTION_PROP_MAIN, "Assertion prop", "AST-PROP", false, -1) #endif -CompPhaseNameMacro(PHASE_UPDATE_FLOW_GRAPH, "Update flow graph", false, -1) -CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS2, "Compute edge weights (2)", false, -1) -CompPhaseNameMacro(PHASE_DETERMINE_FIRST_COLD_BLOCK, "Determine first cold block", false, -1) -CompPhaseNameMacro(PHASE_RATIONALIZE, "Rationalize IR", false, -1) -CompPhaseNameMacro(PHASE_SIMPLE_LOWERING, "Do 'simple' lowering", false, -1) +CompPhaseNameMacro(PHASE_UPDATE_FLOW_GRAPH, "Update flow graph", "UPD-FG", false, -1) +CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS2, "Compute edge weights (2)", "EDG-WGT2", false, -1) +CompPhaseNameMacro(PHASE_DETERMINE_FIRST_COLD_BLOCK, "Determine first cold block", "COLD-BLK", false, -1) +CompPhaseNameMacro(PHASE_RATIONALIZE, "Rationalize IR", "RAT", false, -1) +CompPhaseNameMacro(PHASE_SIMPLE_LOWERING, "Do 'simple' lowering", "SMP-LWR", false, -1) -CompPhaseNameMacro(PHASE_LCLVARLIVENESS, "Local var liveness", true, -1) -CompPhaseNameMacro(PHASE_LCLVARLIVENESS_INIT, "Local var liveness init", false, PHASE_LCLVARLIVENESS) -CompPhaseNameMacro(PHASE_LCLVARLIVENESS_PERBLOCK,"Per block local var liveness", false, PHASE_LCLVARLIVENESS) -CompPhaseNameMacro(PHASE_LCLVARLIVENESS_INTERBLOCK, "Global local var liveness", false, PHASE_LCLVARLIVENESS) +CompPhaseNameMacro(PHASE_LCLVARLIVENESS, "Local var liveness", "LIVENESS", true, -1) +CompPhaseNameMacro(PHASE_LCLVARLIVENESS_INIT, "Local var liveness init", "LIV-INIT", false, PHASE_LCLVARLIVENESS) +CompPhaseNameMacro(PHASE_LCLVARLIVENESS_PERBLOCK,"Per block local var liveness", "LIV-BLK", false, PHASE_LCLVARLIVENESS) +CompPhaseNameMacro(PHASE_LCLVARLIVENESS_INTERBLOCK, "Global local var liveness", "LIV-GLBL", false, PHASE_LCLVARLIVENESS) -CompPhaseNameMacro(PHASE_LVA_ADJUST_REF_COUNTS, "LVA adjust ref counts", false, -1) +CompPhaseNameMacro(PHASE_LVA_ADJUST_REF_COUNTS, "LVA adjust ref counts", "REF-CNT", false, -1) #ifdef LEGACY_BACKEND -CompPhaseNameMacro(PHASE_RA_ASSIGN_VARS, "RA assign vars", false, -1) +CompPhaseNameMacro(PHASE_RA_ASSIGN_VARS, "RA assign vars", "REGALLOC", false, -1) #endif // LEGACY_BACKEND -CompPhaseNameMacro(PHASE_LOWERING_DECOMP, "Lowering decomposition", false, -1) -CompPhaseNameMacro(PHASE_LOWERING, "Lowering nodeinfo", false, -1) +CompPhaseNameMacro(PHASE_LOWERING_DECOMP, "Lowering decomposition", "LWR-DEC", false, -1) +CompPhaseNameMacro(PHASE_LOWERING, "Lowering nodeinfo", "LWR-INFO", false, -1) #ifndef LEGACY_BACKEND -CompPhaseNameMacro(PHASE_LINEAR_SCAN, "Linear scan register alloc", true, -1) -CompPhaseNameMacro(PHASE_LINEAR_SCAN_BUILD, "LSRA build intervals", false, PHASE_LINEAR_SCAN) -CompPhaseNameMacro(PHASE_LINEAR_SCAN_ALLOC, "LSRA allocate", false, PHASE_LINEAR_SCAN) -CompPhaseNameMacro(PHASE_LINEAR_SCAN_RESOLVE, "LSRA resolve", false, PHASE_LINEAR_SCAN) +CompPhaseNameMacro(PHASE_LINEAR_SCAN, "Linear scan register alloc", "LSRA", true, -1) +CompPhaseNameMacro(PHASE_LINEAR_SCAN_BUILD, "LSRA build intervals", "LSRA-BLD", false, PHASE_LINEAR_SCAN) +CompPhaseNameMacro(PHASE_LINEAR_SCAN_ALLOC, "LSRA allocate", "LSRA-ALL", false, PHASE_LINEAR_SCAN) +CompPhaseNameMacro(PHASE_LINEAR_SCAN_RESOLVE, "LSRA resolve", "LSRA-RES", false, PHASE_LINEAR_SCAN) #endif // !LEGACY_BACKEND -CompPhaseNameMacro(PHASE_GENERATE_CODE, "Generate code", false, -1) -CompPhaseNameMacro(PHASE_EMIT_CODE, "Emit code", false, -1) -CompPhaseNameMacro(PHASE_EMIT_GCEH, "Emit GC+EH tables", false, -1) +CompPhaseNameMacro(PHASE_GENERATE_CODE, "Generate code", "CODEGEN", false, -1) +CompPhaseNameMacro(PHASE_EMIT_CODE, "Emit code", "EMIT", false, -1) +CompPhaseNameMacro(PHASE_EMIT_GCEH, "Emit GC+EH tables", "EMT-GCEH", false, -1) #undef CompPhaseNameMacro diff --git a/src/jit/conventions.txt b/src/jit/conventions.txt new file mode 100644 index 0000000000..2984ed6043 --- /dev/null +++ b/src/jit/conventions.txt @@ -0,0 +1,81 @@ +This file contains an extracted, plain-text version of some of the "CLR JIT +Coding Conventions" document, that can be used as a template when writing new +comments in the JIT source code. The definitive coding conventions document is +located here: + +https://github.com/dotnet/coreclr/blob/master/Documentation/coding-guidelines/clr-jit-coding-conventions.md + + +********** Section 7.1.5 TODO comments + +This is the format to be used: + +// TODO[-Arch][-Platform][-CQ|-Throughput|-Cleanup|-Bug|-Bug?]: description of the issue + +-- One type modifier (CQ, Throughput, Cleanup, Bug or Bug?) must be specified. +-- The -Arch and -Platform modifiers are optional, and should generally specify +actual architectures in all-caps (e.g. AMD64, X86, ARM, ARM64), and then in +Pascal casing for Platforms and architecture classes (e.g. ARMArch, LdStArch, XArch, Unix, Windows). +-- This list is not intended to be exhaustive. + +Examples: + + // TODO-LdStArch-Bug: Should regTmp be a dst on the node or an internal reg? + // Either way, it is not currently being handled by Lowering. + + // TODO-CQ: based on whether src type is aligned use movaps instead. + + // TODO-Cleanup: Add a comment about why this is unreached() for RyuJIT backend. + + // TODO-Arm64-Bug: handle large constants! Probably need something like the ARM + // case above: if (arm_Valid_Imm_For_Instr(ins, val)) ... + + +********** Section 9.4 Function header comment + +All functions, except trivial accessors and wrappers, should have a function +header comment which describes the behavior and the implementation details of +the function. The format of the function header in an implementation file is +as shown below. + +Within the comment, argument names (and other program-related names) should be +surrounded by double quotes, to emphasize that they are program objects, and +not simple English words. This helps clarify those cases where a function +argument might be parsed (by a human) in either way. + +Any of the sections that do not apply to a method may be skipped. For example, +if a method has no arguments, the "Arguments" section can be omitted. If a +function is a void return function, the "Return Value" section can be omitted. + +If you can formulate any assumptions as asserts in the code itself, you should +do so. The "Assumptions" section is intended to encapsulate things that are +harder (or impossible) to formulate as asserts, or to provide a place to write +a more easily read English description of any assumptions that exist, even if +they can be written with asserts. + + +//------------------------------------------------------------------------ +// <Function name>: <Short description of the function> +// +// <Full description of the function> +// +// Arguments: +// <argument1-name> - Description of argument 1 +// <argument2-name> - Description of argument 2 +// ... one line for each function argument +// +// Return Value: +// Description of the values this function could return +// and under what conditions. When the return value is a +// described as a function of the arguments, those arguments +// should be mentioned specifically by name. +// +// Assumptions: +// Any entry and exit conditions, such as required preconditions of +// data structures, memory to be freed by caller, etc. +// +// Notes: +// More detailed notes about the function. +// What errors can the function return? +// What other methods are related or alternatives to be considered? + diff --git a/src/jit/copyprop.cpp b/src/jit/copyprop.cpp index a4c56a1c46..db797a04c2 100644 --- a/src/jit/copyprop.cpp +++ b/src/jit/copyprop.cpp @@ -407,6 +407,7 @@ void Compiler::optVnCopyProp() // Compute the domTree to use. BlkToBlkSetMap* domTree = new (getAllocator()) BlkToBlkSetMap(getAllocator()); + domTree->Reallocate(fgBBcount * 3 / 2); // Prime the allocation SsaBuilder::ComputeDominators(this, domTree); struct BlockWork diff --git a/src/jit/dataflow.h b/src/jit/dataflow.h index 29b9ec73b0..2adce0dbae 100644 --- a/src/jit/dataflow.h +++ b/src/jit/dataflow.h @@ -26,13 +26,6 @@ private: DataFlow(); public: - // Used to ask the dataflow object to restart analysis. - enum UpdateResult - { - RestartAnalysis, - ContinueAnalysis - }; - // The callback interface that needs to be implemented by anyone // needing updates by the dataflow object. class Callback @@ -42,9 +35,7 @@ public: void StartMerge(BasicBlock* block); void Merge(BasicBlock* block, BasicBlock* pred, flowList* preds); - void EndMerge(BasicBlock* block); - bool Changed(BasicBlock* block); - DataFlow::UpdateResult Update(BasicBlock* block); + bool EndMerge(BasicBlock* block); private: Compiler* m_pCompiler; @@ -78,14 +69,9 @@ void DataFlow::ForwardAnalysis(TCallback& callback) callback.Merge(block, pred->flBlock, preds); } } - callback.EndMerge(block); - if (callback.Changed(block)) + if (callback.EndMerge(block)) { - UpdateResult result = callback.Update(block); - - assert(result == DataFlow::ContinueAnalysis); - AllSuccessorIter succsBegin = block->GetAllSuccs(m_pCompiler).begin(); AllSuccessorIter succsEnd = block->GetAllSuccs(m_pCompiler).end(); for (AllSuccessorIter succ = succsBegin; succ != succsEnd; ++succ) diff --git a/src/jit/dll/altjit.def b/src/jit/dll/altjit.def deleted file mode 100644 index ec1a3c7a75..0000000000 --- a/src/jit/dll/altjit.def +++ /dev/null @@ -1,8 +0,0 @@ -; ==++== -; -; Copyright (c) Microsoft Corporation. All rights reserved. -; -; ==--== -EXPORTS - getJit - sxsJitStartup
\ No newline at end of file diff --git a/src/jit/dll/jit.nativeproj b/src/jit/dll/jit.nativeproj index 011db75005..97981e7eff 100644 --- a/src/jit/dll/jit.nativeproj +++ b/src/jit/dll/jit.nativeproj @@ -30,8 +30,6 @@ <LinkModuleDefinitionFile>$(OutputName).def</LinkModuleDefinitionFile> - <ClDefines Condition="'$(BuildArchitecture)' == 'i386'">$(ClDefines);LEGACY_BACKEND</ClDefines> - <ClDefines Condition="'$(BuildArchitecture)' == 'arm'">$(ClDefines);LEGACY_BACKEND</ClDefines> <ClDefines Condition="'$(BuildArchitecture)' == 'amd64'">$(ClDefines);FEATURE_SIMD;FEATURE_AVX_SUPPORT</ClDefines> <Win32DllLibs>$(SdkLibPath)\kernel32.lib;$(SdkLibPath)\user32.lib;$(SdkLibPath)\advapi32.lib;$(SdkLibPath)\oleaut32.lib;$(SdkLibPath)\uuid.lib</Win32DllLibs> @@ -58,13 +56,6 @@ <LinkDelayLoad Condition="'$(EnableLateDisasm)' == 'true'">$(LinkDelayLoad);msvcdis$(VC_NONCRT_ProdVerX).dll</LinkDelayLoad> <UseDelayimpLib Condition="'$(EnableLateDisasm)' == 'true' and '$(FeatureMergeJitAndEngine)'!='true'">true</UseDelayimpLib> - <!-- For debugging purposes only, temporarily enable these in RET builds so GenTree debugging is easier. --> - <!-- - <ClDefines>$(ClDefines);DEBUGGABLE_GENTREE=1</ClDefines> - <ClAdditionalOptions Condition="'$(DebugBuild)' != 'true'">$(ClAdditionalOptions) /Ob0</ClAdditionalOptions> - <LinkAdditionalOptions Condition="'$(DebugBuild)' != 'true'">$(LinkAdditionalOptions) /OPT:NOICF</LinkAdditionalOptions> - --> - <!-- Disable merge of text and rdata for DevDiv:696146--> <LinkMergeRData Condition="'$(BuildArchitecture)'=='i386'">false</LinkMergeRData> </PropertyGroup> diff --git a/src/jit/earlyprop.cpp b/src/jit/earlyprop.cpp new file mode 100644 index 0000000000..0d760513f8 --- /dev/null +++ b/src/jit/earlyprop.cpp @@ -0,0 +1,433 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// +// +// Early Value Propagation +// +// This phase performs an SSA-based value propagation optimization, currently only applies to array +// lengths and runtime type handles. An SSA-based backwards tracking of local variables is performed +// at each point of interest, e.g., an array length reference site or a method table reference site. +// The tracking continues until an interesting value is encountered. The value is then used to rewrite +// the source site. +// +/////////////////////////////////////////////////////////////////////////////////////// + +#include "jitpch.h" +#include "ssabuilder.h" + + +bool Compiler::optDoEarlyPropForFunc() +{ + bool propArrayLen = (optMethodFlags & OMF_HAS_NEWARRAY) && (optMethodFlags & OMF_HAS_ARRAYREF); + bool propGetType = (optMethodFlags & OMF_HAS_NEWOBJ) && (optMethodFlags & OMF_HAS_VTABLEREF); + return propArrayLen || propGetType; +} + +bool Compiler::optDoEarlyPropForBlock(BasicBlock* block) +{ + bool bbHasArrayRef = (block->bbFlags & BBF_HAS_INDX) != 0; + bool bbHasVtableRef = (block->bbFlags & BBF_HAS_VTABREF) != 0; + return bbHasArrayRef || bbHasVtableRef; +} + +//-------------------------------------------------------------------- +// gtIsVtableRef: Return true if the tree is a method table reference. +// +// Arguments: +// tree - The input tree. +// +// Return Value: +// Return true if the tree is a method table reference. + +bool Compiler::gtIsVtableRef(GenTreePtr tree) +{ + if (tree->OperGet() == GT_IND) + { + GenTreeIndir* indir = tree->AsIndir(); + + if (!indir->HasIndex()) + { + // Check if the base is an reference pointer. + if (indir->Base()->TypeGet() == TYP_REF) + { + return true; + } + } + } + + return false; +} + +//------------------------------------------------------------------------------ +// getArrayLengthFromAllocation: Return the array length for an array allocation +// helper call. +// +// Arguments: +// tree - The array allocation helper call. +// +// Return Value: +// Return the array length node. + +GenTreePtr Compiler::getArrayLengthFromAllocation(GenTreePtr tree) +{ + assert(tree != nullptr); + + if (tree->OperGet() == GT_CALL) + { + GenTreeCall* call = tree->AsCall(); + + if (call->gtCallType == CT_HELPER) + { + CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd); + + if (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_DIRECT) || + call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_OBJ) || + call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_VC) || + call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_ALIGN8)) + { + // This is an array allocation site. Grab the array length node. + return gtArgEntryByArgNum(call, 1)->node; + } + } + } + + return nullptr; +} + +//----------------------------------------------------------------------------- +// getObjectHandleNodeFromAllocation: Return the type handle for an object allocation +// helper call. +// +// Arguments: +// tree - The object allocation helper call. +// +// Return Value: +// Return the object type handle node. + +GenTreePtr Compiler::getObjectHandleNodeFromAllocation(GenTreePtr tree) +{ + assert(tree != nullptr); + + if (tree->OperGet() == GT_CALL) + { + GenTreeCall* call = tree->AsCall(); + + if (call->gtCallType == CT_HELPER) + { + CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd); + + if (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWFAST) || + call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWSFAST) || + call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWSFAST_ALIGN8) || + call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_DIRECT) || + call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_OBJ) || + call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_VC) || + call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_ALIGN8)) + { + // This is an object allocation site. Return the runtime type handle node. + fgArgTabEntryPtr argTabEntry = gtArgEntryByArgNum(call, 0); + return argTabEntry->node; + } + } + } + + return nullptr; +} + +//------------------------------------------------------------------------------------------ +// optEarlyProp: The entry point of the early value propagation. +// +// Notes: +// This phase performs an SSA-based value propagation, including +// 1. Array length propagation. +// 2. Runtime type handle propagation. +// +// For array length propagation, a demand-driven SSA-based backwards tracking of constant +// array lengths is performed at each array length reference site which is in form of a +// GT_ARR_LENGTH node. When a GT_ARR_LENGTH node is seen, the array ref pointer which is +// the only child node of the GT_ARR_LENGTH is tracked. This is only done for array ref +// pointers that have valid SSA forms.The tracking is along SSA use-def chain and stops +// at the original array allocation site where we can grab the array length. The +// GT_ARR_LENGTH node will then be rewritten to a GT_CNS_INT node if the array length is +// constant. +// +// Similarly, the same algorithm also applies to rewriting a method table (also known as +// vtable) reference site which is in form of GT_INDIR node. The base pointer, which is +// an object reference pointer, is treated in the same way as an array reference pointer. + +void Compiler::optEarlyProp() +{ +#ifdef DEBUG + if (verbose) + { + printf("*************** In optEarlyProp()\n"); + } +#endif + + assert(fgSsaPassesCompleted == 1); + + if (!optDoEarlyPropForFunc()) + { + return; + } + + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) + { + if (!optDoEarlyPropForBlock(block)) + continue; + + compCurBB = block; + + for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; ) + { + // Preserve the next link before the propagation and morph. + GenTreeStmt* next = stmt->gtNextStmt; + + compCurStmt = stmt; + + // Walk the stmt tree in linear order to rewrite any array length reference with a + // constant array length. + bool isRewritten = false; + for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree != nullptr; tree = tree->gtNext) + { + if (optEarlyPropRewriteTree(tree)) + { + isRewritten = true; + } + } + + // Morph the stmt and update the evaluation order if the stmt has been rewritten. + if (isRewritten) + { + gtSetStmtInfo(stmt); + fgSetStmtSeq(stmt); + } + + stmt = next; + } + } + +#ifdef DEBUG + if (verbose) + { + JITDUMP("\nAfter optEarlyProp:\n"); + fgDispBasicBlocks(/*dumpTrees*/true); + } +#endif +} + +//---------------------------------------------------------------- +// optEarlyPropRewriteValue: Rewrite a tree to the actual value. +// +// Arguments: +// tree - The input tree node to be rewritten. +// +// Return Value: +// Return true iff "tree" is successfully rewritten. + +bool Compiler::optEarlyPropRewriteTree(GenTreePtr tree) +{ + GenTreePtr objectRefPtr = nullptr; + optPropKind propKind = optPropKind::OPK_INVALID; + + if (tree->OperGet() == GT_ARR_LENGTH) + { + objectRefPtr = tree->gtOp.gtOp1; + propKind = optPropKind::OPK_ARRAYLEN; + } + else if (gtIsVtableRef(tree)) + { + objectRefPtr = tree->gtOp.gtOp1; + propKind = optPropKind::OPK_OBJ_GETTYPE; + } + else + { + return false; + } + + if (!objectRefPtr->OperIsScalarLocal() || + fgExcludeFromSsa(objectRefPtr->AsLclVarCommon()->GetLclNum())) + + { + return false; + } + + bool isRewritten = false; + GenTreePtr root = compCurStmt; + unsigned lclNum = objectRefPtr->AsLclVarCommon()->GetLclNum(); + unsigned ssaNum = objectRefPtr->AsLclVarCommon()->GetSsaNum(); + + GenTreePtr actualVal = optPropGetValue(lclNum, ssaNum, propKind); + + if (actualVal != nullptr) + { + if (propKind == optPropKind::OPK_ARRAYLEN) + { + assert(actualVal->IsCnsIntOrI()); + + if (actualVal->gtIntCon.gtIconVal > INT32_MAX) + { + // Don't propagate array lengths that are beyond the maximum value of a GT_ARR_LENGTH. + // node. CORINFO_HELP_NEWARR_1_OBJ helper call allows to take a long integer as the + // array length argument, but the type of GT_ARR_LENGTH is always INT32. + return false; + } + } + else if (propKind == optPropKind::OPK_OBJ_GETTYPE) + { + assert(actualVal->IsCnsIntOrI()); + } + +#ifdef DEBUG + if (verbose) + { + printf("optEarlyProp Rewriting BB%02u\n", compCurBB->bbNum); + gtDispTree(root); + printf("\n"); + } +#endif + // Rewrite the tree using a copy of "actualVal" + GenTreePtr actualValCopy; + var_types origType = tree->gtType; + + if (actualVal->GetNodeSize() <= tree->GetNodeSize()) + { + actualValCopy = tree; + } + else + { + actualValCopy = gtNewLargeOperNode(GT_ADD, TYP_INT); + } + + fgWalkTreePre(&tree, Compiler::lvaDecRefCntsCB, (void*)this, true); + + actualValCopy->CopyFrom(actualVal, this); + actualValCopy->gtType = origType; + + fgWalkTreePre(&actualValCopy, Compiler::lvaIncRefCntsCB, (void*)this, true); + + if (actualValCopy != tree) + { + gtReplaceTree(root, tree, actualValCopy); + } + + isRewritten = true; + +#ifdef DEBUG + if (verbose) + { + printf("to\n"); + gtDispTree(compCurStmt); + printf("\n"); + } +#endif + } + + return isRewritten; +} + +//------------------------------------------------------------------------------------------- +// optPropGetValue: Given an SSA object ref pointer, get the value needed based on valueKind. +// +// Arguments: +// lclNum - The local var number of the ref pointer. +// ssaNum - The SSA var number of the ref pointer. +// valueKind - The kind of value of interest. +// +// Return Value: +// Return the corresponding value based on valueKind. + +GenTreePtr Compiler::optPropGetValue(unsigned lclNum, unsigned ssaNum, optPropKind valueKind) +{ + return optPropGetValueRec(lclNum, ssaNum, valueKind, 0); +} + +//----------------------------------------------------------------------------------- +// optPropGetValueRec: Given an SSA object ref pointer, get the value needed based on valueKind +// within a recursion bound. +// +// Arguments: +// lclNum - The local var number of the array pointer. +// ssaNum - The SSA var number of the array pointer. +// valueKind - The kind of value of interest. +// walkDepth - Current recursive walking depth. +// +// Return Value: +// Return the corresponding value based on valueKind. + +GenTreePtr Compiler::optPropGetValueRec(unsigned lclNum, unsigned ssaNum, optPropKind valueKind, int walkDepth) +{ + if (ssaNum == SsaConfig::RESERVED_SSA_NUM) + { + return nullptr; + } + + SSAName ssaName(lclNum, ssaNum); + GenTreePtr value = nullptr; + + // Bound the recursion with a hard limit. + if (walkDepth > optEarlyPropRecurBound) + { + return nullptr; + } + + // Track along the use-def chain to get the array length + GenTreePtr treelhs = lvaTable[lclNum].GetPerSsaData(ssaNum)->m_defLoc.m_tree; + + if (treelhs == nullptr) + { + // Incoming parameters or live-in variables don't have actual definition tree node + // for their FIRST_SSA_NUM. See SsaBuilder::RenameVariables. + assert(ssaNum == SsaConfig::FIRST_SSA_NUM); + } + else + { + GenTreePtr *lhsPtr; + GenTreePtr treeDefParent = treelhs->gtGetParent(&lhsPtr); + + if (treeDefParent->OperGet() == GT_ASG) + { + assert(treelhs == treeDefParent->gtGetOp1()); + GenTreePtr treeRhs = treeDefParent->gtGetOp2(); + + if (treeRhs->OperIsScalarLocal() && !fgExcludeFromSsa(treeRhs->AsLclVarCommon()->GetLclNum())) + { + // Recursively track the Rhs + unsigned rhsLclNum = treeRhs->AsLclVarCommon()->GetLclNum(); + unsigned rhsSsaNum = treeRhs->AsLclVarCommon()->GetSsaNum(); + + value = optPropGetValueRec(rhsLclNum, rhsSsaNum, valueKind, walkDepth + 1); + } + else + { + if (valueKind == optPropKind::OPK_ARRAYLEN) + { + value = getArrayLengthFromAllocation(treeRhs); + if (value != nullptr) + { + if (!value->IsCnsIntOrI()) + { + // Leave out non-constant-sized array + value = nullptr; + } + } + } + else if(valueKind == optPropKind::OPK_OBJ_GETTYPE) + { + value = getObjectHandleNodeFromAllocation(treeRhs); + if (value != nullptr) + { + if (!value->IsCnsIntOrI()) + { + // Leave out non-constant-sized array + value = nullptr; + } + } + } + } + } + } + + return value; +} diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp index 4c8e2ff30e..da2a187c4c 100644 --- a/src/jit/ee_il_dll.cpp +++ b/src/jit/ee_il_dll.cpp @@ -46,6 +46,21 @@ JitOptions jitOpts = /*****************************************************************************/ +void jitStartup() +{ +#ifdef FEATURE_TRACELOGGING + JitTelemetry::NotifyDllProcessAttach(); +#endif + Compiler::compStartup(); +} + +void jitShutdown() +{ + Compiler::compShutdown(); +#ifdef FEATURE_TRACELOGGING + JitTelemetry::NotifyDllProcessDetach(); +#endif +} /***************************************************************************** * jitOnDllProcessAttach() called by DllMain() when jit.dll is loaded @@ -53,7 +68,7 @@ JitOptions jitOpts = void jitOnDllProcessAttach() { - Compiler::compStartup(); + jitStartup(); } /***************************************************************************** @@ -62,7 +77,7 @@ void jitOnDllProcessAttach() void jitOnDllProcessDetach() { - Compiler::compShutdown(); + jitShutdown(); } @@ -126,7 +141,7 @@ ICorJitCompiler* __stdcall getJit() { ILJitter = new (CILJitSingleton) CILJit(); #ifdef FEATURE_MERGE_JIT_AND_ENGINE - Compiler::compStartup(); + jitStartup(); #endif } return(ILJitter); @@ -213,6 +228,10 @@ void CILJit::ProcessShutdownWork(ICorStaticInfo* statInfo) // Continue, by shutting down this JIT as well. } +#ifdef FEATURE_MERGE_JIT_AND_ENGINE + jitShutdown(); +#endif + Compiler::ProcessShutdownWork(statInfo); } @@ -231,7 +250,6 @@ void CILJit::getVersionIdentifier(GUID* versionIdentifier) memcpy(versionIdentifier, &JITEEVersionIdentifier, sizeof(GUID)); } -#ifndef RYUJIT_CTPBUILD /***************************************************************************** * Determine the maximum length of SIMD vector supported by this JIT. */ @@ -260,14 +278,11 @@ unsigned CILJit::getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags) return 0; #endif // !_TARGET_AMD64_ } -#endif //!RYUJIT_CTPBUILD -#ifndef RYUJIT_CTPBUILD void CILJit::setRealJit(ICorJitCompiler* realJitCompiler) { g_realJitCompiler = realJitCompiler; } -#endif // !RYUJIT_CTPBUILD /***************************************************************************** @@ -276,11 +291,11 @@ void CILJit::setRealJit(ICorJitCompiler* realJitCompiler) unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig) { -#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) +#if defined(_TARGET_AMD64_) // Everything fits into a single 'slot' size // to accommodate irregular sized structs, they are passed byref - // TODO-ARM64-Bug?: structs <= 16 bytes get passed in 2 consecutive registers. + #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING CORINFO_CLASS_HANDLE argClass; CorInfoType argTypeJit = strip(info.compCompHnd->getArgType(sig, list, &argClass)); @@ -288,12 +303,12 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_ if (argType == TYP_STRUCT) { unsigned structSize = info.compCompHnd->getClassSize(argClass); - return structSize; + return structSize; // TODO: roundUp() needed here? } #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING return sizeof(size_t); -#else // !_TARGET_AMD64_ && !_TARGET_ARM64_ +#else // !_TARGET_AMD64_ CORINFO_CLASS_HANDLE argClass; CorInfoType argTypeJit = strip(info.compCompHnd->getArgType(sig, list, &argClass)); @@ -306,13 +321,23 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_ // make certain the EE passes us back the right thing for refanys assert(argTypeJit != CORINFO_TYPE_REFANY || structSize == 2*sizeof(void*)); - return (unsigned)roundUp(structSize, sizeof(size_t)); +#if FEATURE_MULTIREG_STRUCT_ARGS +#ifdef _TARGET_ARM64_ + if (structSize > MAX_PASS_MULTIREG_BYTES) + { + // This struct is passed by reference using a single 'slot' + return TARGET_POINTER_SIZE; + } +#endif // _TARGET_ARM64_ +#endif // FEATURE_MULTIREG_STRUCT_ARGS + + return (unsigned)roundUp(structSize, TARGET_POINTER_SIZE); } else { - unsigned argSize = sizeof(size_t) * genTypeStSz(argType); + unsigned argSize = sizeof(int) * genTypeStSz(argType); assert(0 < argSize && argSize <= sizeof(__int64)); - return argSize; + return (unsigned)roundUp(argSize, TARGET_POINTER_SIZE); } #endif } @@ -332,13 +357,8 @@ GenTreePtr Compiler::eeGetPInvokeCookie(CORINFO_SIG_INFO *szMetaSig) unsigned Compiler::eeGetArrayDataOffset(var_types type) { -#ifndef RYUJIT_CTPBUILD return varTypeIsGC(type) ? eeGetEEInfo()->offsetOfObjArrayData : offsetof(CORINFO_Array, u1Elems); -#else - return varTypeIsGC(type) ? offsetof(CORINFO_RefArray, refElems) - : offsetof(CORINFO_Array, u1Elems); -#endif } /*****************************************************************************/ @@ -895,6 +915,20 @@ void Compiler::eeSetEHinfo(unsigned EHnumber, } } +WORD Compiler::eeGetRelocTypeHint(void * target) +{ + if (info.compMatchedVM) + { + return info.compCompHnd->getRelocTypeHint(target); + } + else + { + // No hints + return (WORD)-1; + } +} + + CORINFO_FIELD_HANDLE Compiler::eeFindJitDataOffs(unsigned dataOffs) { // Data offsets are marked by the fact that the low two bits are 0b01 0x1 diff --git a/src/jit/ee_il_dll.hpp b/src/jit/ee_il_dll.hpp index 41026f69db..3f33f625bd 100644 --- a/src/jit/ee_il_dll.hpp +++ b/src/jit/ee_il_dll.hpp @@ -22,11 +22,9 @@ class CILJit: public ICorJitCompiler GUID* versionIdentifier /* OUT */ ); -#ifndef RYUJIT_CTPBUILD unsigned getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags); void setRealJit(ICorJitCompiler* realJitCompiler); -#endif // !RYUJIT_CTPBUILD }; /***************************************************************************** diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp index e327657af5..2a5dbf8a8c 100644 --- a/src/jit/emit.cpp +++ b/src/jit/emit.cpp @@ -1414,14 +1414,19 @@ void * emitter::emitAllocInstr(size_t sz, emitAttr opsz) #if RELOC_SUPPORT - if (EA_IS_DSP_RELOC(opsz) && emitComp->opts.compReloc) + // Amd64: ip-relative addressing is supported even when not generating relocatable ngen code + if (EA_IS_DSP_RELOC(opsz) +#ifndef _TARGET_AMD64_ + && emitComp->opts.compReloc +#endif //_TARGET_AMD64_ + ) { /* Mark idInfo()->idDspReloc to remember that the */ /* address mode has a displacement that is relocatable */ id->idSetIsDspReloc(); } - if (EA_IS_CNS_RELOC(opsz) && emitComp->opts.compReloc) + if (EA_IS_CNS_RELOC(opsz) && emitComp->opts.compReloc) { /* Mark idInfo()->idCnsReloc to remember that the */ /* instruction has an immediate constant that is relocatable */ @@ -5654,6 +5659,7 @@ void emitter::emitRecordGCcall(BYTE * codePos, call->cdGCrefRegs = (regMaskSmall)emitThisGCrefRegs; call->cdByrefRegs = (regMaskSmall)emitThisByrefRegs; + #if EMIT_TRACK_STACK_DEPTH #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING noway_assert(FitsIn<USHORT>(emitCurStackLvl / ((unsigned)sizeof(unsigned)))); @@ -6883,7 +6889,7 @@ void emitter::emitRecordCallSite(ULONG instrOffset, /* I CORINFO_SIG_INFO* callSig, /* IN */ CORINFO_METHOD_HANDLE methodHandle) /* IN */ { -#if defined(DEBUG) && !defined(RYUJIT_CTPBUILD) +#if defined(DEBUG) // Since CORINFO_SIG_INFO is a heavyweight structure, in most cases we can // lazily obtain it here using the given method handle (we only save the sig // info when we explicitly need it, i.e. for CALLI calls, vararg calls, and @@ -6905,7 +6911,7 @@ void emitter::emitRecordCallSite(ULONG instrOffset, /* I } emitCmpHandle->recordCallSite(instrOffset, callSig, methodHandle); -#endif // defined(DEBUG) && !defined(RYUJIT_CTPBUILD) +#endif // defined(DEBUG) } /*****************************************************************************/ diff --git a/src/jit/emit.h b/src/jit/emit.h index fa53f2c5cc..baf3bb387e 100644 --- a/src/jit/emit.h +++ b/src/jit/emit.h @@ -1485,6 +1485,7 @@ private: UNATIVE_OFFSET emitInstCodeSz(instrDesc *id); #ifndef LEGACY_BACKEND + CORINFO_FIELD_HANDLE emitLiteralConst(ssize_t cnsValIn, emitAttr attr = EA_8BYTE); CORINFO_FIELD_HANDLE emitFltOrDblConst(GenTreeDblCon *tree, emitAttr attr = EA_UNKNOWN); regNumber emitInsBinary (instruction ins, emitAttr attr, GenTree* dst, GenTree* src); regNumber emitInsTernary (instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2); diff --git a/src/jit/emitarm64.cpp b/src/jit/emitarm64.cpp index 06e4e73d07..1efe6c913b 100644 --- a/src/jit/emitarm64.cpp +++ b/src/jit/emitarm64.cpp @@ -431,7 +431,7 @@ void emitter::emitInsSanityCheck(instrDesc *id) assert(emitGetInsSC(id) <= 4); if (insOptsLSL(id->idInsOpt())) { - assert(emitGetInsSC(id) > 0); + assert((emitGetInsSC(id) > 0) || (id->idReg2() == REG_ZR)); // REG_ZR encodes SP and we allow a shift of zero } break; @@ -875,6 +875,35 @@ bool emitter::emitInsMayWriteMultipleRegs(instrDesc *id) } } +// For the small loads/store instruction we adjust the size 'attr' +// depending upon whether we have a load or a store +// +emitAttr emitter::emitInsAdjustLoadStoreAttr(instruction ins, emitAttr attr) +{ + if (EA_SIZE(attr) <= EA_4BYTE) + { + if (emitInsIsLoad(ins)) + { + // The value of 'ins' encodes the size to load + // we use EA_8BYTE here because it is the size we will write (into dataReg) + // it is also required when ins is INS_ldrsw + // + attr = EA_8BYTE; + } + else + { + assert(emitInsIsStore(ins)); + + // The value of 'ins' encodes the size to store + // we use EA_4BYTE here because it is the size of the register + // that we want to display when storing small values + // + attr = EA_4BYTE; + } + } + return attr; +} + // Takes an instrDesc 'id' and uses the instruction 'ins' to determine the // size of the target register that is written or read by the instruction. // Note that even if EA_4BYTE is returned a load instruction will still @@ -1975,6 +2004,28 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) return false; } +// true if this 'imm' can be encoded as the offset in a ldr/str instruction +/*static*/ bool emitter::emitIns_valid_imm_for_ldst_offset(INT64 imm, emitAttr attr) +{ + if (imm == 0) + return true; // Encodable using IF_LS_2A + + if ((imm >= -256) && (imm <= 255)) + return true; // Encodable using IF_LS_2C (or possibly IF_LS_2B) + + if (imm < 0) + return false; // not encodable + + emitAttr size = EA_SIZE(attr); + unsigned scale = NaturalScale_helper(size); + ssize_t mask = size - 1; // the mask of low bits that must be zero to encode the immediate + + if (((imm & mask) == 0) && ((imm >> scale) < 0x1000)) + return true; // Encodable using IF_LS_2B + + return false; // not encodable +} + /************************************************************************ * * A helper method to return the natural scale for an EA 'size' @@ -2171,6 +2222,11 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) imm = -imm; // convert to unsigned } + if (imm < 0) + { + return false; // Must be MIN_INT64 + } + if ((imm & 0xfff) != 0) // Now the low 12 bits all have to be zero { return false; @@ -4706,6 +4762,60 @@ void emitter::emitIns_R_R_I(instruction ins, appendToCurIG(id); } +/***************************************************************************** +* +* Add an instruction referencing two registers and a constant. +* Also checks for a large immediate that needs a second instruction +* and will load it in reg1 +* +* - Supports instructions: add, adds, sub, subs, and, ands, eor and orr +* - Requires that reg1 is a general register and not SP or ZR +* - Requires that reg1 != reg2 +*/ +void emitter::emitIns_R_R_Imm(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + ssize_t imm) +{ + assert(isGeneralRegister(reg1)); + assert(reg1 != reg2); + + bool immFits = true; + + switch (ins) + { + case INS_add: + case INS_adds: + case INS_sub: + case INS_subs: + immFits = emitter::emitIns_valid_imm_for_add(imm, attr); + break; + + case INS_ands: + case INS_and: + case INS_eor: + case INS_orr: + immFits = emitter::emitIns_valid_imm_for_alu(imm, attr); + break; + + default: + assert(!"Unsupported instruction in emitIns_R_R_Imm"); + } + + if (immFits) + { + emitIns_R_R_I(ins, attr, reg1, reg2, imm); + } + else + { + // Load 'imm' into the reg1 register + // then issue: 'ins' reg1, reg2, reg1 + // + codeGen->instGen_Set_Reg_To_Imm(attr, reg1, imm); + emitIns_R_R_R(ins, attr, reg1, reg2, reg1); + } +} /***************************************************************************** * @@ -5211,6 +5321,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, } else if (isAddSub) { + bool reg2IsSP = (reg2 == REG_SP); assert(!isLdSt); assert(isValidGeneralDatasize(size)); assert(isGeneralRegister(reg3)); @@ -5251,7 +5362,17 @@ void emitter::emitIns_R_R_R_I(instruction ins, { assert(insOptsNone(opt)); - fmt = IF_DR_3A; + if (reg2IsSP) + { + // To encode the SP register as reg2 we must use the IF_DR_3C encoding + // and also specify a LSL of zero (imm == 0) + opt = INS_OPTS_LSL; + fmt = IF_DR_3C; + } + else + { + fmt = IF_DR_3A; + } } else { @@ -10430,161 +10551,151 @@ void emitter::emitDispFrameRef(int varx, int disp, int offs, bool #endif // DEBUG -// this is very similar to emitInsBinary and probably could be folded in to same -// except the requirements on the incoming parameter are different, -// ex: the memory op in storeind case must NOT be contained -void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node) +// Generate code for a load or store operation with a potentially complex addressing mode +// This method handles the case of a GT_IND with contained GT_LEA op1 of the x86 form [base + index*sccale + offset] +// Since Arm64 does not directly support this complex of an addressing mode +// we may generates up to three instructions for this for Arm64 +// +void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir) { - switch (node->OperGet()) + emitAttr ldstAttr = isVectorRegister(dataReg) ? attr : emitInsAdjustLoadStoreAttr(ins, attr); + + GenTree* addr = indir->Addr(); + + if (addr->isContained()) { - case GT_IND: - { - assert(emitInsIsLoad(ins)); + assert(addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA); - GenTreeIndir* indir = node->AsIndir(); - GenTree* addr = node->gtGetOp1(); + int offset = 0; + DWORD lsl = 0; - // The value of 'ins' encodes the size to load - // we use EA_8BYTE here because it is the size we want to write (into node->gtRegNum) - // it is also required when ins is INS_ldrsw - // - if (EA_SIZE(attr) < EA_8BYTE) + if (addr->OperGet() == GT_LEA) + { + offset = (int) addr->AsAddrMode()->gtOffset; + if (addr->AsAddrMode()->gtScale > 0) { - attr = EA_8BYTE; + assert(isPow2(addr->AsAddrMode()->gtScale)); + BitScanForward(&lsl, addr->AsAddrMode()->gtScale); } + } - if (addr->isContained()) - { - assert(addr->OperGet() == GT_LCL_VAR || - addr->OperGet() == GT_LEA); + GenTree* memBase = indir->Base(); - int offset = 0; + if (indir->HasIndex()) + { + GenTree* index = indir->Index(); - if (addr->OperGet() == GT_LEA) - { - offset = (int) addr->AsAddrMode()->gtOffset; - } - GenTree* memBase = indir->Base(); + if (offset != 0) + { + regMaskTP tmpRegMask = indir->gtRsvdRegs; + regNumber tmpReg = genRegNumFromMask(tmpRegMask); + noway_assert(tmpReg != REG_NA); - if (indir->HasIndex()) + if (emitIns_valid_imm_for_add(offset, EA_8BYTE)) { - assert(offset == 0); - GenTree* index = indir->Index(); + if (lsl > 0) + { + // Generate code to set tmpReg = base + index*scale + emitIns_R_R_R_I(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum, lsl, INS_OPTS_LSL); + } + else // no scale + { + // Generate code to set tmpReg = base + index + emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum); + } - emitIns_R_R_R(ins, attr, node->gtRegNum, - memBase->gtRegNum, index->gtRegNum); + noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg)); + + // Then load/store dataReg from/to [tmpReg + offset] + emitIns_R_R_I(ins, ldstAttr, dataReg, tmpReg, offset);; } - else + else // large offset { - emitIns_R_R_I(ins, attr, node->gtRegNum, - memBase->gtRegNum, offset); - } - } - else - { - codeGen->genConsumeReg(addr); - emitIns_R_R(ins, attr, node->gtRegNum, addr->gtRegNum); - } - } - break; + // First load/store tmpReg with the large offset constant + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); + // Then add the base register + // rd = rd + base + emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, tmpReg, memBase->gtRegNum); - case GT_STOREIND: - { - assert(emitInsIsStore(ins)); + noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg)); + noway_assert(tmpReg != index->gtRegNum); - GenTreeIndir* indir = node->AsIndir(); - GenTree* addr = node->gtGetOp1(); - GenTree* data = node->gtGetOp2(); - regNumber dataReg = REG_NA; - if (data->isContainedIntOrIImmed()) - { - assert(data->IsZero()); - dataReg = REG_ZR; - } - else - { - assert(!data->isContained()); - codeGen->genConsumeReg(data); - dataReg = data->gtRegNum; - } - - // The value of 'ins' encodes the size to store - // we use EA_4BYTE here because it is the size of the register - // that we want to display when storing small values - // - if (EA_SIZE(attr) < EA_4BYTE) - { - attr = EA_4BYTE; + // Then load/store dataReg from/to [tmpReg + index*scale] + emitIns_R_R_R_I(ins, ldstAttr, dataReg, tmpReg, index->gtRegNum, lsl, INS_OPTS_LSL); + } } - - if (addr->isContained()) + else // (offset == 0) { - assert(addr->OperGet() == GT_LCL_VAR_ADDR || - addr->OperGet() == GT_LEA); - - int offset = 0; - - if (addr->OperGet() == GT_LEA) + if (lsl > 0) { - offset = (int) addr->AsAddrMode()->gtOffset; + // Then load/store dataReg from/to [memBase + index*scale] + emitIns_R_R_R_I(ins, ldstAttr, dataReg, memBase->gtRegNum, index->gtRegNum, lsl, INS_OPTS_LSL); } - GenTree* memBase = indir->Base(); - - if (indir->HasIndex()) + else // no scale { - assert(offset == 0); - GenTree* index = indir->Index(); - - emitIns_R_R_R(ins, attr, dataReg, - memBase->gtRegNum, index->gtRegNum); + // Then load/store dataReg from/to [memBase + index] + emitIns_R_R_R(ins, ldstAttr, dataReg, memBase->gtRegNum, index->gtRegNum); } - else - { - emitIns_R_R_I(ins, attr, dataReg, - memBase->gtRegNum, offset); - } - } - else - { - codeGen->genConsumeReg(addr); - emitIns_R_R(ins, attr, dataReg, addr->gtRegNum); } } - break; - - case GT_STORE_LCL_VAR: + else // no Index register { - assert(emitInsIsStore(ins)); - - GenTreeLclVarCommon* varNode = node->AsLclVarCommon(); - - GenTree* data = node->gtOp.gtOp1->gtEffectiveVal(); - regNumber dataReg = REG_NA; - if (data->isContainedIntOrIImmed()) + if (emitIns_valid_imm_for_ldst_offset(offset, EA_SIZE(attr))) { - assert(data->IsZero()); - dataReg = REG_ZR; + // Then load/store dataReg from/to [memBase + offset] + emitIns_R_R_I(ins, ldstAttr, dataReg, memBase->gtRegNum, offset); } else { - assert(!data->isContained()); - codeGen->genConsumeReg(data); - dataReg = data->gtRegNum; - } + // We require a tmpReg to hold the offset + regMaskTP tmpRegMask = indir->gtRsvdRegs; + regNumber tmpReg = genRegNumFromMask(tmpRegMask); + noway_assert(tmpReg != REG_NA); - codeGen->inst_set_SV_var(varNode); - assert(varNode->gtRegNum == REG_NA); // stack store + // First load/store tmpReg with the large offset constant + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); - emitIns_S_R(ins, attr, dataReg, - varNode->GetLclNum(), 0); - - codeGen->genUpdateLife(varNode); + // Then load/store dataReg from/to [memBase + tmpReg] + emitIns_R_R_R(ins, ldstAttr, dataReg, memBase->gtRegNum, tmpReg); + } } - return; + } + else // addr is not contained, so we evaluate it into a register + { + codeGen->genConsumeReg(addr); + // Then load/store dataReg from/to [addrReg] + emitIns_R_R(ins, ldstAttr, dataReg, addr->gtRegNum); + } +} - default: - unreached(); + +// Generates an integer data section constant and returns a field handle representing +// the data offset to access the constant via a load instruction. +// This is called during ngen for any relocatable constants +// +CORINFO_FIELD_HANDLE emitter::emitLiteralConst(ssize_t cnsValIn, emitAttr attr /*=EA_8BYTE*/) +{ + ssize_t constValue = cnsValIn; + void * cnsAddr = &constValue; + bool dblAlign; + + if (attr == EA_4BYTE) + { + dblAlign = false; + } + else + { + assert(attr == EA_8BYTE); + dblAlign = true; } + + // Access to inline data is 'abstracted' by a special type of static member + // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference + // to constant data, not a real static field. + + UNATIVE_OFFSET cnsSize = (attr == EA_4BYTE) ? 4 : 8; + UNATIVE_OFFSET cnum = emitDataConst(cnsAddr, cnsSize, dblAlign); + return emitComp->eeFindJitDataOffs(cnum); } // Generates a float or double data section constant and returns field handle representing @@ -10715,7 +10826,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, bool isMulOverflow = false; bool isUnsignedMul = false; instruction ins2 = INS_invalid; - regNumber extraReg = REG_ZR; + regNumber extraReg = REG_NA; if (dst->gtOverflowEx()) { if (ins == INS_add) @@ -10730,11 +10841,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, { isMulOverflow = true; isUnsignedMul = ((dst->gtFlags & GTF_UNSIGNED) != 0); - regMaskTP tmpRegsMask = dst->gtRsvdRegs; - assert(genCountBits(tmpRegsMask) >= 1); - regMaskTP extraRegMask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~extraRegMask; - extraReg = genRegNumFromMask(extraRegMask); ins2 = isUnsignedMul ? INS_umulh : INS_smulh; assert(intConst == nullptr); // overflow format doesn't support an int constant operand } @@ -10749,22 +10855,55 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, } else { - emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); if (isMulOverflow) { - emitIns_R_R_R(ins2, attr, extraReg, src1->gtRegNum, src2->gtRegNum); if (isUnsignedMul) { + assert(genCountBits(dst->gtRsvdRegs) == 1); + extraReg = genRegNumFromMask(dst->gtRsvdRegs); + + // Compute the high result + emitIns_R_R_R(ins2, attr, extraReg, src1->gtRegNum, src2->gtRegNum); + emitIns_R_I(INS_cmp, EA_8BYTE, extraReg, 0); + codeGen->genCheckOverflow(dst); + + // Now multiply without skewing the high result if no overflow. + emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); } else { + // Make sure that we have an internal register + assert(genCountBits(dst->gtRsvdRegs) == 2); + + // There will be two bits set in tmpRegsMask. + // Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask' + regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum); + regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask + extraReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask + + // Make sure the two registers are not the same. + assert(extraReg != dst->gtRegNum); + + // Save the high result in a temporary register + emitIns_R_R_R(ins2, attr, extraReg, src1->gtRegNum, src2->gtRegNum); + + // Now multiply without skewing the high result. + emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); + emitIns_R_R_I(INS_cmp, EA_8BYTE, extraReg, dst->gtRegNum, 63, INS_OPTS_ASR); + + codeGen->genCheckOverflow(dst); } } + else + { + // We can just multiply. + emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); + } } - if (dst->gtOverflowEx()) + if (dst->gtOverflowEx() && !isMulOverflow) { assert(!varTypeIsFloating(dst)); codeGen->genCheckOverflow(dst); diff --git a/src/jit/emitarm64.h b/src/jit/emitarm64.h index defe9b87a3..5b418a8733 100644 --- a/src/jit/emitarm64.h +++ b/src/jit/emitarm64.h @@ -100,15 +100,18 @@ private: bool emitInsIsLoad (instruction ins); bool emitInsIsStore (instruction ins); bool emitInsIsLoadOrStore(instruction ins); + emitAttr emitInsAdjustLoadStoreAttr(instruction ins, emitAttr attr); emitAttr emitInsTargetRegSize(instrDesc *id); emitAttr emitInsLoadStoreSize(instrDesc *id); emitter::insFormat emitInsFormat(instruction ins); - emitter::code_t emitInsCode(instruction ins, insFormat fmt); - static unsigned emitOutput_Instr(BYTE *dst, code_t code); + // Generate code for a load or store operation and handle the case of contained GT_LEA op1 with [base + index<<scale + offset] + void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir); + // Emit the 32-bit Arm64 instruction 'code' into the 'dst' buffer + static unsigned emitOutput_Instr(BYTE *dst, code_t code); // A helper method to return the natural scale for an EA 'size' static unsigned NaturalScale_helper(emitAttr size); @@ -451,6 +454,9 @@ public: // true if this 'imm' can be encoded as a input operand to an alu instruction static bool emitIns_valid_imm_for_alu(INT64 imm, emitAttr size); + // true if this 'imm' can be encoded as the offset in a ldr/str instruction + static bool emitIns_valid_imm_for_ldst_offset(INT64 imm, emitAttr size); + // true if 'imm' can use the left shifted by 12 bits encoding static bool canEncodeWithShiftImmBy12(INT64 imm); @@ -660,6 +666,13 @@ public: ssize_t imm, insOpts opt = INS_OPTS_NONE); + // Checks for a large immediate that needs a second instruction + void emitIns_R_R_Imm(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + ssize_t imm); + void emitIns_R_R_R (instruction ins, emitAttr attr, regNumber reg1, diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index b83b308338..2437b1ec90 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -604,28 +604,6 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, s return 0; } -void emitter::AdjustDisp32ForRipRelative(instrDesc* id, BYTE** pDisp, const BYTE* addrOfNextInstr) -{ -#ifdef _TARGET_AMD64_ - BYTE* disp = *pDisp; - ssize_t relativeDisp = disp - addrOfNextInstr; - - if (id->idIsDspReloc()) - return; - - // If we can directly encode RIP-relative, do it - // otherwise use a reloc so the VM knows to generate a jump stub - if ((int)relativeDisp == relativeDisp) - { - *pDisp = (BYTE*)relativeDisp; - } - else - { - id->idSetIsDspReloc(); - } -#endif //_TARGET_AMD64_ -} - #ifdef _TARGET_AMD64_ /***************************************************************************** * Is the last instruction emitted a call instruction? @@ -1968,6 +1946,14 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code) /* The address is of the form "[disp]" */ size += sizeof(INT32); + +#ifdef _TARGET_AMD64_ + // If id is not marked for reloc, add 1 additional byte for SIB that follows disp32 + if (!id->idIsDspReloc()) + { + size++; + } +#endif return size; case REG_EBP: AMD64_ONLY(case REG_R13:) @@ -2343,12 +2329,10 @@ void emitter::emitIns(instruction ins) #ifdef DEBUG #if FEATURE_STACK_FP_X87 -#if INLINE_MATH if (ins != INS_fabs && ins != INS_fsqrt && ins != INS_fsin && ins != INS_fcos) -#endif #endif // FEATURE_STACK_FP_X87 { @@ -2421,7 +2405,7 @@ void emitter::emitIns(instruction ins) emitCurIGsize += sz; } -#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) +#if !defined(LEGACY_BACKEND) // Add an instruction with no operands, but whose encoding depends on the size // (Only CDQ/CQO currently) void emitter::emitIns(instruction ins, emitAttr attr) @@ -2446,10 +2430,6 @@ void emitter::emitIns(instruction ins, emitAttr attr) dispIns(id); emitCurIGsize += sz; } -#endif // defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) - -#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) -// not amd64 specific, just calls back into stuff that is in codegenamd64 (for now) // fill in all the fields @@ -2466,12 +2446,15 @@ void emitter::emitHandleMemOp(GenTree* mem, instrDesc* id, bool isSrc) // Static always need relocs if (!jitStaticFldIsGlobAddr(fldHnd)) { - if (emitComp->opts.compReloc) - { - // Mark idInfo()->idDspReloc to remember that the - // address mode has a displacement that is relocatable - id->idSetIsDspReloc(); - } + // Contract: + // fgMorphField() changes any statics that won't fit into 32-bit addresses into + // constants with an indir, rather than GT_CLS_VAR, based on reloc type hint given + // by VM. Hence emitter should always mark GT_CLS_VAR_ADDR as relocatable. + // + // Data section constants: these get allocated close to code block of the method and + // always addressable IP relative. These too should be marked as relocatable. + + id->idSetIsDspReloc(); } id->idAddr()->iiaFieldHnd = fldHnd; @@ -2484,12 +2467,19 @@ void emitter::emitHandleMemOp(GenTree* mem, instrDesc* id, bool isSrc) id->idInsFmt(IF_MRD_RRD); } } - else if ((memBase != nullptr) && memBase->IsCnsIntOrI() && memBase->isContained() && memBase->AsIntConCommon()->FitsInAddrBase(emitComp)) + else if ((memBase != nullptr) && memBase->IsCnsIntOrI() && memBase->isContained()) { - if (emitComp->opts.compReloc && memBase->IsIconHandle()) + // Absolute addresses marked as contained should fit within the base of addr mode. + assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp)); + + // Either not generating relocatable code or addr must be an icon handle + assert(!emitComp->opts.compReloc || memBase->IsIconHandle()); + + if (memBase->AsIntConCommon()->AddrNeedsReloc(emitComp)) { id->idSetIsDspReloc(); } + if (isSrc) { id->idInsFmt(IF_RRD_ARD); @@ -2594,16 +2584,28 @@ void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node) } else { - assert (mem->Addr()->OperIsAddrMode() || - mem->Addr()->gtOper == GT_CLS_VAR_ADDR || - (mem->Addr()->IsCnsIntOrI() && mem->Addr()->isContained()) || - !mem->Addr()->isContained()); + GenTreePtr addr = mem->Addr(); + + assert (addr->OperIsAddrMode() || + addr->gtOper == GT_CLS_VAR_ADDR || + (addr->IsCnsIntOrI() && addr->isContained()) || + !addr->isContained()); size_t offset = mem->Offset(); id = emitNewInstrAmd(attr, offset); id->idIns(ins); id->idReg1(node->gtRegNum); id->idInsFmt(IF_RWR_ARD); emitHandleMemOp(node, id, true); // may overwrite format + + if (addr->IsCnsIntOrI() && addr->isContained()) + { + // Absolute addresses marked as contained should fit within the base of addr mode. + assert(addr->AsIntConCommon()->FitsInAddrBase(emitComp)); + + // Case of "ins re, [disp]" and should use IF_RWR_ARD as format + id->idInsFmt(IF_RWR_ARD); + } + sz = emitInsSizeAM(id, insCodeRM(ins)); id->idCodeSize(sz); } @@ -2646,6 +2648,7 @@ void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node) } return; } + if (data->isContainedIntOrIImmed()) { int icon = (int) data->AsIntConCommon()->IconValue(); @@ -2653,6 +2656,16 @@ void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node) id->idIns(ins); id->idInsFmt(IF_AWR_CNS); emitHandleMemOp(node, id, false); // may overwrite format + + if ((memBase != nullptr) && memBase->IsCnsIntOrI() && memBase->isContained()) + { + // Absolute addresses marked as contained should fit within the base of addr mode. + assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp)); + + // Case of "ins [disp], immed " and should use IF_AWR_CNS as format + id->idInsFmt(IF_AWR_CNS); + } + sz = emitInsSizeAM(id, insCodeMI(ins), icon); id->idCodeSize(sz); } @@ -2699,6 +2712,12 @@ void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node) emitCurIGsize += sz; } +CORINFO_FIELD_HANDLE emitter::emitLiteralConst(ssize_t cnsValIn, emitAttr attr /*= EA_8BYTE*/) +{ + NYI("emitLiteralConst"); + return nullptr; +} + // Generates a float or double data section constant and returns field handle representing // the data offset to access the constant. This is called by emitInsBinary() in case // of contained float of double constants. @@ -3046,14 +3065,24 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G */ void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTree* dstAddr, GenTree* src) { - GenTreePtr mem = dstAddr->gtOp.gtOp1; + assert(dstAddr->isIndir()); + GenTreeIndir* indir = dstAddr->AsIndir(); + GenTreePtr mem = indir->Addr(); assert(mem->gtSkipReloadOrCopy()->OperGet() == GT_LCL_VAR || + mem->gtSkipReloadOrCopy()->OperGet() == GT_LCL_VAR_ADDR || mem->gtSkipReloadOrCopy()->OperGet() == GT_LEA || - mem->gtSkipReloadOrCopy()->OperGet() == GT_CLS_VAR_ADDR); + mem->gtSkipReloadOrCopy()->OperGet() == GT_CLS_VAR_ADDR || + mem->gtSkipReloadOrCopy()->OperGet() == GT_CNS_INT); instrDesc* id = nullptr; UNATIVE_OFFSET sz; + size_t offset = 0; + if (mem->gtSkipReloadOrCopy()->OperGet() != GT_CLS_VAR_ADDR) + { + offset = indir->Offset(); + } + // find immed (if any) - it cannot be a dst GenTreeIntConCommon* intConst = nullptr; if (src->isContainedIntOrIImmed()) @@ -3061,26 +3090,13 @@ void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTree* dstAddr, GenTr intConst = src->AsIntConCommon(); } - if (intConst) + if (intConst != nullptr) { - if (mem->OperGet() == GT_LEA) - { - id = emitNewInstrAmdCns(attr, mem->AsAddrMode()->gtOffset, (int) intConst->IconValue()); - } - else // it's just an indir, no offset - { - id = emitNewInstrAmdCns(attr, 0, (int) intConst->IconValue()); - } + id = emitNewInstrAmdCns(attr, offset, (int) intConst->IconValue()); } else { // ind, reg - size_t offset = 0; - if (mem->OperIsAddrMode() && mem->isContained()) - { - offset = mem->AsAddrMode()->gtOffset; - } - id = emitNewInstrAmd(attr, offset); // there must be one non-contained src assert(!src->isContained()); @@ -3094,26 +3110,26 @@ void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTree* dstAddr, GenTr if (src->isContainedIntOrIImmed()) { - if (mem->gtSkipReloadOrCopy()->OperGet() == GT_CLS_VAR_ADDR) - { - id->idInsFmt(IF_MRW_CNS); - } - else - { - id->idInsFmt(IF_ARW_CNS); - } + if (mem->gtSkipReloadOrCopy()->OperGet() == GT_CLS_VAR_ADDR) + { + id->idInsFmt(IF_MRW_CNS); + } + else + { + id->idInsFmt(IF_ARW_CNS); + } sz = emitInsSizeAM(id, insCodeMI(ins), (int) intConst->IconValue()); } else { - if (mem->gtSkipReloadOrCopy()->OperGet() == GT_CLS_VAR_ADDR) - { - id->idInsFmt(IF_MRW_RRD); - } - else - { - id->idInsFmt(IF_ARW_RRD); - } + if (mem->gtSkipReloadOrCopy()->OperGet() == GT_CLS_VAR_ADDR) + { + id->idInsFmt(IF_MRW_RRD); + } + else + { + id->idInsFmt(IF_ARW_RRD); + } sz = emitInsSizeAM(id, insCodeMR(ins)); } @@ -3139,17 +3155,19 @@ void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTree* dstAddr, GenTr void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTree* dstAddr) { assert(ins == INS_not || ins == INS_neg); - - GenTreePtr mem = dstAddr->gtOp.gtOp1; - assert(mem->OperGet() == GT_LCL_VAR || - mem->OperGet() == GT_CLS_VAR_ADDR || - mem->OperGet() == GT_LEA || - (mem->OperGet() == GT_COPY && mem->gtGetOp1()->OperGet() == GT_LCL_VAR)); + assert(dstAddr->isIndir()); + GenTreeIndir* indir = dstAddr->AsIndir(); + GenTreePtr mem = indir->Addr(); + assert(mem->gtSkipReloadOrCopy()->OperGet() == GT_LCL_VAR || + mem->gtSkipReloadOrCopy()->OperGet() == GT_LCL_VAR_ADDR || + mem->gtSkipReloadOrCopy()->OperGet() == GT_CLS_VAR_ADDR || + mem->gtSkipReloadOrCopy()->OperGet() == GT_LEA || + mem->gtSkipReloadOrCopy()->OperGet() == GT_CNS_INT); size_t offset = 0; - if (mem->OperIsAddrMode() && mem->isContained()) + if (mem->gtSkipReloadOrCopy()->OperGet() != GT_CLS_VAR_ADDR) { - offset = mem->AsAddrMode()->gtOffset; + offset = indir->Offset(); } instrDesc* id = emitNewInstrAmd(attr, offset); @@ -3157,15 +3175,15 @@ void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTree* dstAddr) emitHandleMemOp(dstAddr, id, true); id->idIns(ins); - - if(mem->OperGet() == GT_CLS_VAR_ADDR) - { - id->idInsFmt(IF_MRW); - } - else - { - id->idInsFmt(IF_ARW); - } + + if(mem->OperGet() == GT_CLS_VAR_ADDR) + { + id->idInsFmt(IF_MRW); + } + else + { + id->idInsFmt(IF_ARW); + } UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins)); id->idCodeSize(sz); @@ -3174,7 +3192,7 @@ void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTree* dstAddr) emitCurIGsize += sz; } -#endif // _TARGET_XARCH_ && !LEGACY_BACKEND +#endif // !LEGACY_BACKEND #if FEATURE_STACK_FP_X87 /***************************************************************************** @@ -3243,14 +3261,14 @@ void emitter::emitIns_R(instruction ins, sz = 2; // x64 has no 1-byte opcode (it is the same encoding as the REX prefix) -#else // _TARGET_AMD64_ +#else // !_TARGET_AMD64_ if (size == EA_1BYTE) sz = 2; // Use the long form as the small one has no 'w' bit else sz = 1; // Use short form -#endif // _TARGET_AMD64_ +#endif // !_TARGET_AMD64_ break; @@ -3814,8 +3832,7 @@ void emitter::emitIns_R_C(instruction ins, UNATIVE_OFFSET sz; instrDesc* id; - /* Are we MOV'ing the offset of the class variable into EAX? */ - + // Are we MOV'ing the offset of the class variable into EAX? if (EA_IS_OFFSET(attr)) { id = emitNewInstrDsp(EA_1BYTE, offs); @@ -3824,8 +3841,7 @@ void emitter::emitIns_R_C(instruction ins, assert(ins == INS_mov && reg == REG_EAX); - /* Special case: "mov eax, [addr]" is smaller */ - + // Special case: "mov eax, [addr]" is smaller sz = 1 + sizeof(void*); } else @@ -3836,21 +3852,24 @@ void emitter::emitIns_R_C(instruction ins, id->idIns(ins); id->idInsFmt(fmt); - /* Special case: "mov eax, [addr]" is smaller */ - - if (ins == INS_mov && reg == REG_EAX) +#ifdef _TARGET_X86_ + // Special case: "mov eax, [addr]" is smaller. + // This case is not enabled for amd64 as it always uses RIP relative addressing + // and it results in smaller instruction size than encoding 64-bit addr in the + // instruction. + if (ins == INS_mov && reg == REG_EAX) { sz = 1 + sizeof(void*); if (size == EA_2BYTE) sz += 1; } else +#endif //_TARGET_X86_ { sz = emitInsSizeCV(id, insCodeRM(ins)); } - /* Special case: mov reg, fs:[ddd] */ - + // Special case: mov reg, fs:[ddd] if (fldHnd == FLD_GLOBAL_FS) sz += 1; } @@ -3909,21 +3928,24 @@ void emitter::emitIns_C_R (instruction ins, UNATIVE_OFFSET sz; - /* Special case: "mov [addr], EAX" is smaller */ - - if (ins == INS_mov && reg == REG_EAX) +#ifdef _TARGET_X86_ + // Special case: "mov [addr], EAX" is smaller. + // This case is not enable for amd64 as it always uses RIP relative addressing + // and it will result in smaller instruction size than encoding 64-bit addr in + // the instruction. + if (ins == INS_mov && reg == REG_EAX) { sz = 1 + sizeof(void*); if (size == EA_2BYTE) sz += 1; } else +#endif //_TARGET_X86_ { sz = emitInsSizeCV(id, insCodeMR(ins)); } - /* Special case: mov reg, fs:[ddd] */ - + // Special case: mov reg, fs:[ddd] if (fldHnd == FLD_GLOBAL_FS) { sz += 1; @@ -4053,10 +4075,17 @@ void emitter::emitIns_J_S (instruction ins, #if RELOC_SUPPORT // Storing the address of a basicBlock will need a reloc // as the instruction uses the absolute address, - // not a relative address + // not a relative address. + // + // On Amd64, Absolute code addresses should always go through a reloc to + // to be encoded as RIP rel32 offset. +#ifndef _TARGET_AMD64_ if (emitComp->opts.compReloc) - id->idSetIsDspReloc(); #endif + { + id->idSetIsDspReloc(); + } +#endif //RELOC_SUPPORT id->idCodeSize(sz); @@ -5519,19 +5548,17 @@ void emitter::emitIns_Call(EmitCallType callType, id->idSetIsNoGC(isNoGC); - /* Record the address: method, indirection, or funcptr */ - + // Record the address: method, indirection, or funcptr if (callType >= EC_FUNC_VIRTUAL) { - /* This is an indirect call (either a virtual call or func ptr call) */ + // This is an indirect call (either a virtual call or func ptr call) switch (callType) { case EC_INDIR_C: - - if (emitComp->opts.compReloc) - id->idSetIsDspReloc(); - + // Indirect call using an absolute code address. + // Must be marked as relocatable and is done at the + // branch target location. goto CALL_ADDR_MODE; case EC_INDIR_R: // the address is in a register @@ -5560,17 +5587,35 @@ void emitter::emitIns_Call(EmitCallType callType, // fall-through - /* The function is "ireg" if id->idIsCallRegPtr(), - else [ireg+xmul*xreg+disp] */ + // The function is "ireg" if id->idIsCallRegPtr(), + // else [ireg+xmul*xreg+disp] id->idInsFmt(IF_ARD); id->idAddr()->iiaAddrMode.amBaseReg = ireg; - id->idAddr()->iiaAddrMode.amIndxReg = xreg; id->idAddr()->iiaAddrMode.amScale = xmul ? emitEncodeScale(xmul) : emitter::OPSZ1; sz = emitInsSizeAM(id, insCodeMR(INS_call)); + + if (ireg == REG_NA && xreg == REG_NA) + { + if (codeGen->genCodeIndirAddrNeedsReloc(disp)) + { + id->idSetIsDspReloc(); + } +#ifdef _TARGET_AMD64_ + else + { + // An absolute indir address that doesn't need reloc should fit within 32-bits + // to be encoded as offset relative to zero. This addr mode requires an extra + // SIB byte + noway_assert((int)addr == (size_t)addr); + sz++; + } +#endif //_TARGET_AMD64_ + } + break; default: @@ -5583,22 +5628,31 @@ void emitter::emitIns_Call(EmitCallType callType, { /* "call [method_addr]" */ - assert(addr != NULL); + assert(addr != nullptr); id->idInsFmt(IF_METHPTR); id->idAddr()->iiaAddr = (BYTE*)addr; sz = 6; #if RELOC_SUPPORT - if (emitComp->opts.compReloc) + // Since this is an indirect call through a pointer and we don't + // currently pass in emitAttr into this function, we query codegen + // whether addr needs a reloc. + if (codeGen->genCodeIndirAddrNeedsReloc((size_t)addr)) { - // Since this is an indirect call through a pointer and we don't - // currently pass in emitAttr into this function we have decided - // to always mark the displacement as being relocatable. - id->idSetIsDspReloc(); } -#endif +#ifdef _TARGET_AMD64_ + else + { + // An absolute indir address that doesn't need reloc should fit within 32-bits + // to be encoded as offset relative to zero. This addr mode requires an extra + // SIB byte + noway_assert((int)addr == (size_t)addr); + sz++; + } +#endif //_TARGET_AMD64_ +#endif //RELOC_SUPPORT } else @@ -5607,7 +5661,7 @@ void emitter::emitIns_Call(EmitCallType callType, assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR); - assert(addr != NULL); + assert(addr != nullptr); id->idInsFmt(IF_METHOD); sz = 5; @@ -5617,18 +5671,15 @@ void emitter::emitIns_Call(EmitCallType callType, if (callType == EC_FUNC_ADDR) { id->idSetIsCallAddr(); + } #if RELOC_SUPPORT - if (emitComp->opts.compReloc) - { - // Since this is an indirect call through a pointer and we don't - // currently pass in emitAttr into this function we have decided - // to always mark the displacement as being relocatable. - - id->idSetIsDspReloc(); - } -#endif + // Direct call to a method and no addr indirection is needed. + if (codeGen->genCodeAddrNeedsReloc((size_t)addr)) + { + id->idSetIsDspReloc(); } +#endif } #ifdef DEBUG @@ -5811,10 +5862,6 @@ const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varN const char* rn = emitComp->compRegVarName(reg, varName); -#ifdef _TARGET_ARM_ - assert(strlen(rn) >= 1); -#endif - #ifdef _TARGET_AMD64_ char suffix = '\0'; @@ -6522,7 +6569,7 @@ void emitter::emitDispIns(instrDesc* id, /* Display a data section reference */ assert((unsigned)offs < emitConsDsc.dsdOffs); - addr = emitConsBlock ? emitConsBlock + offs : NULL; + addr = emitConsBlock ? emitConsBlock + offs : nullptr; #if 0 // TODO-XArch-Cleanup: Fix or remove this code. @@ -7428,7 +7475,7 @@ static BYTE* emitOutputNOP(BYTE* dst, size_t nBytes) *dst++ = 0x00; break; } -#endif // !_TARGET_AMD64_ +#endif // _TARGET_AMD64_ return dst; } @@ -7639,20 +7686,6 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* GOT_DSP: - // Adjust for RIP-relative addressing - if ((rgx == REG_NA) && (reg == REG_NA)) - { - BYTE* end = dst + 2 + 4; - if (addc) - { - if (opsz == 0 || opsz == 8) - end += 4; - else - end += opsz; - } - AdjustDisp32ForRipRelative(id, (BYTE**)&dsp, emitOffsetToPtr(emitCurCodeOffs(end))); - } - dspInByte = ((signed char)dsp == (ssize_t)dsp); dspIsZero = (dsp == 0); @@ -7670,30 +7703,70 @@ GOT_DSP: switch (reg) { case REG_NA: + if (id->idIsDspReloc()) + { + INT32 addlDelta = 0; - // The address is of the form "[disp]" - dst += emitOutputWord(dst, code | 0x0500); -#ifdef _TARGET_X86_ + // The address is of the form "[disp]" + // On x86 - disp is relative to zero + // On Amd64 - disp is relative to RIP + dst += emitOutputWord(dst, code | 0x0500); - dst += emitOutputLong(dst, dsp); + if (addc) + { + // It is of the form "ins [disp], immed" + // For emitting relocation, we also need to take into account of the + // additional bytes of code emitted for immed val. - if (id->idIsDspReloc()) - { - emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_DISP32); - } -#else // AMD64 - if (id->idIsDspReloc()) - { - // im not sure why on amd64 this has to be different. The code here before indicates it didn't - // have to be at the time grant first started working on amd64. + ssize_t cval = addc->cnsVal; + +#ifdef _TARGET_AMD64_ + // all these opcodes only take a sign-extended 4-byte immediate + noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc)); +#else + noway_assert(opsz <= 4); +#endif + + switch (opsz) + { + case 0: + case 4: + case 8: addlDelta = -4; break; + case 2: addlDelta = -2; break; + case 1: addlDelta = -1; break; + + default: + assert(!"unexpected operand size"); + unreached(); + } + } + +#ifdef _TARGET_AMD64_ + // We emit zero on Amd64, to avoid the assert in emitOutputLong() dst += emitOutputLong(dst, 0); - emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_DISP32); +#else + dst += emitOutputLong(dst, dsp); +#endif + emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_DISP32, 0, addlDelta); } else { +#ifdef _TARGET_X86_ + dst += emitOutputWord(dst, code | 0x0500); +#else //_TARGET_AMD64_ + // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero. + // This addr mode should never be used while generating relocatable ngen code nor if + // the addr can be encoded as pc-relative address. + noway_assert(!emitComp->opts.compReloc); + noway_assert(codeGen->genAddrRelocTypeHint((size_t)dsp) != IMAGE_REL_BASED_REL32); + noway_assert((int)dsp == dsp); + + // This requires, specifying a SIB byte after ModRM byte. + dst += emitOutputWord(dst, code | 0x0400); + dst += emitOutputByte(dst, 0x25); +#endif //_TARGET_AMD64_ dst += emitOutputLong(dst, dsp); } -#endif break; @@ -8501,20 +8574,14 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* opsz = 1; } } +#ifdef _TARGET_X86_ else { - /* Special case: "mov eax, [addr]" and "mov [addr], eax" */ - /* TODO-AMD64-CQ: Because on 64-bit the offset is 8-bytes/64-bits, - this encoding is larger than using a regular 'mov' - encoding and RIP-relative addressing (only 5 bytes for - modR/M and 32-bit displacement, versus 8 bytes moffset). - - If we don't need a reloc and we can use RIP-relative - addressing, we should use the original encoding and - avoid the 8 byte moffset... - */ - - if (ins == INS_mov && id->idReg1() == REG_EAX) + // Special case: "mov eax, [addr]" and "mov [addr], eax" + // Amd64: this is one case where addr can be 64-bit in size. This is + // currently unused or not enabled on amd64 as it always uses RIP + // relative addressing which results in smaller instruction size. + if (ins == INS_mov && id->idReg1() == REG_EAX) { switch (id->idInsFmt()) { @@ -8541,6 +8608,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* } } } +#endif //_TARGET_X86_ // Special case emitting AVX instructions if (Is4ByteAVXInstruction(ins)) @@ -8635,8 +8703,8 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* if (code) { if (id->idInsFmt() == IF_MRD_OFF || - id->idInsFmt() == IF_RWR_MRD_OFF || - isMoffset) + id->idInsFmt() == IF_RWR_MRD_OFF || + isMoffset) dst += emitOutputByte(dst, code); else dst += emitOutputWord(dst, code); @@ -8665,14 +8733,17 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* else { // Special case: mov reg, fs:[ddd] or mov reg, [ddd] - if (jitStaticFldIsGlobAddr(fldh)) - addr = NULL; + if (jitStaticFldIsGlobAddr(fldh)) + { + addr = nullptr; + } else { - addr = (BYTE*)emitComp->info.compCompHnd->getFieldAddress(fldh, - NULL); - if (addr == NULL) + addr = (BYTE*)emitComp->info.compCompHnd->getFieldAddress(fldh, nullptr); + if (addr == nullptr) + { NO_WAY("could not obtain address of static field"); + } } } @@ -8680,27 +8751,61 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* if (!isMoffset) { - BYTE* end = dst + 4; + INT32 addlDelta = 0; + if (addc) { - if (opsz == 0 || opsz == 8) - end += 4; - else - end += opsz; + // It is of the form "ins [disp], immed" + // For emitting relocation, we also need to take into account of the + // additional bytes of code emitted for immed val. + + ssize_t cval = addc->cnsVal; + +#ifdef _TARGET_AMD64_ + // all these opcodes only take a sign-extended 4-byte immediate + noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc)); +#else + noway_assert(opsz <= 4); +#endif + + switch (opsz) + { + case 0: + case 4: + case 8: addlDelta = -4; break; + case 2: addlDelta = -2; break; + case 1: addlDelta = -1; break; + + default: + assert(!"unexpected operand size"); + unreached(); + } } - AdjustDisp32ForRipRelative(id, &target, end); +#ifdef _TARGET_AMD64_ + // All static field and data section constant accesses should be marked as relocatable + noway_assert(id->idIsDspReloc()); + dst += emitOutputLong(dst, 0); +#else //_TARGET_X86_ dst += emitOutputLong(dst, (int)target); +#endif //_TARGET_X86_ #ifdef RELOC_SUPPORT if (id->idIsDspReloc()) { - emitRecordRelocation((void*)(dst - sizeof(int)), target, IMAGE_REL_BASED_DISP32); + emitRecordRelocation((void*)(dst - sizeof(int)), target, IMAGE_REL_BASED_DISP32, 0, addlDelta); } #endif } else { +#ifdef _TARGET_AMD64_ + // This code path should never be hit on amd64 since it always uses RIP relative addressing. + // In future if ever there is a need to enable this special case, also enable the logic + // that sets isMoffset to true on amd64. + unreached(); +#else //_TARGET_X86_ + dst += emitOutputSizeT(dst, (ssize_t)target); #ifdef RELOC_SUPPORT @@ -8709,6 +8814,8 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* emitRecordRelocation((void*)(dst - sizeof(void*)), target, IMAGE_REL_BASED_MOFFSET); } #endif + +#endif //_TARGET_X86_ } // Now generate the constant value, if present @@ -10033,7 +10140,7 @@ BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i) dst += emitOutputByte(dst, insCode(ins)); // For forward jumps, record the address of the distance value - id->idjTemp.idjAddr = (distVal > 0) ? dst : NULL; + id->idjTemp.idjAddr = (distVal > 0) ? dst : nullptr; dst += emitOutputByte(dst, distVal); } @@ -10101,6 +10208,7 @@ BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i) idAmd->idAddr()->iiaAddrMode.amBaseReg = REG_NA; idAmd->idAddr()->iiaAddrMode.amIndxReg = REG_NA; emitSetAmdDisp(idAmd, distVal); // set the displacement + idAmd->idSetIsDspReloc(id->idIsDspReloc()); assert(emitGetInsAmdAny(idAmd) == distVal); // make sure "disp" is stored properly UNATIVE_OFFSET sz = emitInsSizeAM(idAmd, insCodeRM(ins)); @@ -10115,7 +10223,7 @@ BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i) // For forward jumps, record the address of the distance value // Hard-coded 4 here because we already output the displacement, as the last thing. - id->idjTemp.idjAddr = (dstOffs > srcOffs) ? (dst - 4) : NULL; + id->idjTemp.idjAddr = (dstOffs > srcOffs) ? (dst - 4) : nullptr; // We're done return dst; @@ -10331,54 +10439,70 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** sz = sizeof(instrDesc); } - // What kind of a call do we have here? - if (id->idIsCallAddr()) + addr = (BYTE*)id->idAddr()->iiaAddr; + assert(addr != nullptr); + + // Some helpers don't get recorded in GC tables + if (id->idIsNoGC()) { - // This is call indirect where we know the target, thus we can - // use a direct call; the target to jump to is in iiaAddr. - assert(id->idInsFmt() == IF_METHOD); - addr = (BYTE*)id->idAddr()->iiaAddr; + recCall = false; } - else - { - // Some helpers don't get recorded in GC tables - if (id->idIsNoGC()) - recCall = false; - // Static method call - addr = id->idAddr()->iiaAddr; + // What kind of a call do we have here? + if (id->idInsFmt() == IF_METHPTR) + { + // This is call indirect via a method pointer - if (id->idInsFmt() == IF_METHPTR) + code = insCodeMR(ins); + if (ins == INS_i_jmp) { - // This is a call via a global method pointer - assert(addr); - - code = insCodeMR(ins); - if (ins == INS_i_jmp) - code |= 1; - - AdjustDisp32ForRipRelative(id, &addr, dst + 6); + code |= 1; + } + if (id->idIsDspReloc()) + { dst += emitOutputWord(dst, code | 0x0500); +#ifdef _TARGET_AMD64_ + dst += emitOutputLong(dst, 0); +#else dst += emitOutputLong(dst, (int)addr); - -#ifdef RELOC_SUPPORT - if (id->idIsDspReloc()) - { - emitRecordRelocation((void*)(dst - sizeof(int)), addr, IMAGE_REL_BASED_DISP32); - } #endif - goto DONE_CALL; + emitRecordRelocation((void*)(dst - sizeof(int)), addr, IMAGE_REL_BASED_DISP32); + } + else + { +#ifdef _TARGET_X86_ + dst += emitOutputWord(dst, code | 0x0500); +#else //_TARGET_AMD64_ + // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero. + // This addr mode should never be used while generating relocatable ngen code nor if + // the addr can be encoded as pc-relative address. + noway_assert(!emitComp->opts.compReloc); + noway_assert(codeGen->genAddrRelocTypeHint((size_t)addr) != IMAGE_REL_BASED_REL32); + noway_assert((int)addr == (ssize_t)addr); + + // This requires, specifying a SIB byte after ModRM byte. + dst += emitOutputWord(dst, code | 0x0400); + dst += emitOutputByte(dst, 0x25); +#endif //_TARGET_AMD64_ + dst += emitOutputLong(dst, (int)addr); } + goto DONE_CALL; } + // Else + // This is call direct where we know the target, thus we can + // use a direct call; the target to jump to is in iiaAddr. + assert(id->idInsFmt() == IF_METHOD); + // Output the call opcode followed by the target distance dst += (ins == INS_l_jmp) ? emitOutputByte(dst, insCode(ins)) : emitOutputByte(dst, insCodeMI(ins)); ssize_t offset; #ifdef _TARGET_AMD64_ - // All REL32 on Amd go through recordRelocation. Here we will output zero to advance dst. + // All REL32 on Amd64 go through recordRelocation. Here we will output zero to advance dst. offset = 0; + assert(id->idIsDspReloc()); #else // Calculate PC relative displacement. // Although you think we should be using sizeof(void*), the x86 and x64 instruction set @@ -10388,12 +10512,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dst += emitOutputLong(dst, offset); -#ifndef _TARGET_AMD64_ // all REL32 on AMD have to go through recordRelocation - if (emitComp->opts.compReloc) -#endif +#ifdef RELOC_SUPPORT + if (id->idIsDspReloc()) { emitRecordRelocation((void*)(dst - sizeof(INT32)), addr, IMAGE_REL_BASED_REL32); } +#endif DONE_CALL: @@ -10837,34 +10961,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** #endif // FEATURE_STACK_FP_X87 - dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500); - - if (ins == INS_call) - { -#if 0 - // TODO-XArch-Cleanup: Fix or remove this code. - // All arguments will be popped after the call - emitStackPop(dst, true, emitGetInsDspCns(id, &offs)); - - // Figure out the size of the instruction descriptor - if (id->idIsLargeCall()) - sz = sizeof(instrDescDCGC); - else - sz = emitSizeOfInsDsc(id); - - // Do we need to record a call location for GC purposes? - if (!emitFullGCinfo) - scRecordGCcall(dst); - -#else - assert(!"what???????"); -#endif - - } - else - { - sz = emitSizeOfInsDsc(id); - } + noway_assert(ins != INS_call); + dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500); + sz = emitSizeOfInsDsc(id); break; case IF_MRD_OFF: diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h index d236b443d5..7c9b66d265 100644 --- a/src/jit/emitxarch.h +++ b/src/jit/emitxarch.h @@ -494,8 +494,6 @@ public: bool isJump = false, bool isNoGC = false); - void AdjustDisp32ForRipRelative(instrDesc *id, BYTE ** pDisp, const BYTE * addrOfNextInstr); - #ifdef _TARGET_AMD64_ // Is the last instruction emitted a call instruction? bool emitIsLastInsCall(); diff --git a/src/jit/error.cpp b/src/jit/error.cpp index 370a452dd1..3fc75ac5f4 100644 --- a/src/jit/error.cpp +++ b/src/jit/error.cpp @@ -101,16 +101,33 @@ void DECLSPEC_NORETURN noWayAssertBody() fatal(CORJIT_RECOVERABLEERROR); } -/*****************************************************************************/ -inline static bool ShouldThrowOnNoway() + +inline static bool ShouldThrowOnNoway( +#ifdef FEATURE_TRACELOGGING + const char* filename, unsigned line +#endif +) { - return GetTlsCompiler() == NULL || GetTlsCompiler()->compShouldThrowOnNoway(); + return GetTlsCompiler() == NULL || + GetTlsCompiler()->compShouldThrowOnNoway( +#ifdef FEATURE_TRACELOGGING + filename, line +#endif + ); } /*****************************************************************************/ -void noWayAssertBodyConditional() +void noWayAssertBodyConditional( +#ifdef FEATURE_TRACELOGGING + const char* filename, unsigned line +#endif +) { +#ifdef FEATURE_TRACELOGGING + if (ShouldThrowOnNoway(filename, line)) +#else if (ShouldThrowOnNoway()) +#endif // FEATURE_TRACELOGGING { noWayAssertBody(); } @@ -335,14 +352,14 @@ BOOL vlogf(unsigned level, const char* fmt, va_list args) return(LogEnv::cur()->compHnd->logMsg(level, fmt, args)); } -void logf_stdout(const char* fmt, va_list args) +int logf_stdout(const char* fmt, va_list args) { // // Fast logging to stdout // const int BUFF_SIZE = 8192; char buffer[BUFF_SIZE]; - _vsnprintf_s(&buffer[0], BUFF_SIZE, _TRUNCATE, fmt, args); + int written = _vsnprintf_s(&buffer[0], BUFF_SIZE, _TRUNCATE, fmt, args); static ConfigDWORD fJitDumpToDebugger; if (fJitDumpToDebugger.val(CLRConfig::INTERNAL_JitDumpToDebugger)) @@ -374,13 +391,16 @@ void logf_stdout(const char* fmt, va_list args) fputs(&buffer[0], stdout); #endif // CROSSGEN_COMPILE } + + return written; } /*********************************************************************/ -void logf(const char* fmt, ...) +int logf(const char* fmt, ...) { va_list args; static bool logToEEfailed = false; + int written = 0; // // We remember when the EE failed to log, because vlogf() // is very slow in a checked build. @@ -400,7 +420,7 @@ void logf(const char* fmt, ...) { // if the EE refuses to log it, we try to send it to stdout va_start(args, fmt); - logf_stdout(fmt, args); + written = logf_stdout(fmt, args); va_end(args); } #if 0 // Enable this only when you need it @@ -431,8 +451,66 @@ void logf(const char* fmt, ...) } #endif // 0 va_end(args); + + return written; } +/*********************************************************************/ +void gcDump_logf(const char* fmt, ...) +{ + va_list args; + static bool logToEEfailed = false; + // + // We remember when the EE failed to log, because vlogf() + // is very slow in a checked build. + // + // If it fails to log an LL_INFO1000 message once + // it will always fail when logging an LL_INFO1000 message. + // + if (!logToEEfailed) + { + va_start(args, fmt); + if (!vlogf(LL_INFO1000, fmt, args)) + logToEEfailed = true; + va_end(args); + } + + if (logToEEfailed) + { + // if the EE refuses to log it, we try to send it to stdout + va_start(args, fmt); + logf_stdout(fmt, args); + va_end(args); + } +#if 0 // Enable this only when you need it + else + { + // + // The EE just successfully logged our message + // + static ConfigDWORD fJitBreakOnDumpToken; + DWORD breakOnDumpToken = fJitBreakOnDumpToken.val(CLRConfig::INTERNAL_BreakOnDumpToken); + static DWORD forbidEntry = 0; + + if ((breakOnDumpToken != 0xffffffff) && (forbidEntry == 0)) + { + forbidEntry = 1; + + // Use value of 0 to get the dump + static DWORD currentLine = 1; + + if (currentLine == breakOnDumpToken) + { + assert(!"Dump token reached"); + } + + printf("(Token=0x%x) ", currentLine++); + forbidEntry = 0; + } + } +#endif // 0 + va_end(args); +} /*********************************************************************/ void logf(unsigned level, const char* fmt, ...) @@ -468,7 +546,11 @@ void noWayAssertAbortHelper(const char * cond, const char * file, unsigned line) void noWayAssertBodyConditional(const char * cond, const char * file, unsigned line) { +#ifdef FEATURE_TRACELOGGING + if (ShouldThrowOnNoway(file, line)) +#else if (ShouldThrowOnNoway()) +#endif { noWayAssertBody(cond, file, line); } diff --git a/src/jit/error.h b/src/jit/error.h index 4ecc2fac81..d93df21e27 100644 --- a/src/jit/error.h +++ b/src/jit/error.h @@ -77,7 +77,11 @@ extern void DECLSPEC_NORETURN noWayAssertBody(const char * cond, const char * fi // Conditionally invoke the noway assert body. The conditional predicate is evaluated using a method on the tlsCompiler. // If a noway_assert is hit, we ask the Compiler whether to raise an exception (i.e., conditionally raise exception.) // To have backward compatibility between v4.5 and v4.0, in min-opts we take a shot at codegen rather than rethrow. -extern void noWayAssertBodyConditional(); +extern void noWayAssertBodyConditional( +#ifdef FEATURE_TRACELOGGING + const char * file, unsigned line +#endif +); extern void noWayAssertBodyConditional(const char * cond, const char * file, unsigned line); #if !defined(_TARGET_X86_) || !defined(LEGACY_BACKEND) @@ -158,7 +162,14 @@ extern void notYetImplemented(const char * msg, const char * file, unsigned line #define NO_WAY(msg) noWay() #define BADCODE(msg) badCode() #define BADCODE3(msg, msg2, arg) badCode() -#define noway_assert(cond) do { if (!(cond)) { noWayAssertBodyConditional(); } } while (0) + +#ifdef FEATURE_TRACELOGGING +#define NOWAY_ASSERT_BODY_ARGUMENTS __FILE__, __LINE__ +#else +#define NOWAY_ASSERT_BODY_ARGUMENTS +#endif + +#define noway_assert(cond) do { if (!(cond)) { noWayAssertBodyConditional(NOWAY_ASSERT_BODY_ARGUMENTS); } } while (0) #define unreached() noWayAssertBody() #endif diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp index c26f221c3f..bd1f559f51 100644 --- a/src/jit/flowgraph.cpp +++ b/src/jit/flowgraph.cpp @@ -625,6 +625,9 @@ GenTreeStmt* Compiler::fgInsertStmtNearEnd(BasicBlock* block, GenTreePtr node) { GenTreeStmt* stmt; + // This routine is not aware of embedded stmts and can only be used when in tree order. + assert(fgOrder == FGOrderTree); + if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH) || (block->bbJumpKind == BBJ_RETURN)) @@ -2357,7 +2360,7 @@ void Compiler::fgDfsInvPostOrder() noway_assert(fgBBNumMax == fgBBcount); #ifdef DEBUG - if (verbose) + if (0 && verbose) { printf("\nAfter doing a post order traversal of the BB graph, this is the ordering:\n"); for (unsigned i = 1; i <= fgBBNumMax; ++i) @@ -2723,9 +2726,11 @@ void Compiler::fgBuildDomTree() noway_assert(preNum == fgBBNumMax + 1); noway_assert(postNum == fgBBNumMax + 1); + noway_assert(fgDomTreePreOrder[0] == 0); // Unused first element + noway_assert(fgDomTreePostOrder[0] == 0); // Unused first element #ifdef DEBUG - if (verbose) + if (0 && verbose) { printf("\nAfter traversing the dominance tree:\n"); printf("PreOrder:\n"); @@ -2738,8 +2743,6 @@ void Compiler::fgBuildDomTree() { printf("BB%02u : %02u\n", i, fgDomTreePostOrder[i]); } - assert(fgDomTreePreOrder[0] == 0); // Unused first element - assert(fgDomTreePostOrder[0] == 0); // Unused first element } #endif // DEBUG } @@ -4827,7 +4830,7 @@ ADDR_TAKEN: ti = lvaTable[varNum].lvVerTypeInfo; } - if (lvaTable[varNum].TypeGet() != TYP_STRUCT && // We will put structs in the stack anyway + if (!varTypeIsStruct(&lvaTable[varNum]) && // We will put structs in the stack anyway // And changing the addrTaken of a local // requires an extra pass in the morpher // so we won't apply this optimization @@ -4842,11 +4845,9 @@ ADDR_TAKEN: // at all { // We can skip the addrtaken, as next IL instruction consumes - // the address. For debuggable code we mark this bit so we - // can have asserts in the rest of the jit + // the address. #ifdef DEBUG noway_assert(varNum < lvaTableCnt); - lvaTable[varNum].lvSafeAddrTaken = 1; #endif } else @@ -4866,7 +4867,7 @@ ADDR_TAKEN: if (pSm && ti.IsValueClass() && - varType != TYP_STRUCT) + !varTypeIsStruct(varType)) { #ifdef DEBUG if (verbose) @@ -5421,7 +5422,11 @@ DECODE_OPCODE: { bool isCallPopAndRet = false; - if (!impIsTailCallILPattern(tailCall, opcode, codeAddr+sz, codeEndp, &isCallPopAndRet)) + // impIsTailCallILPattern uses isRecursive flag to determine whether ret in a fallthrough block is + // allowed. We don't know at this point whether the call is recursive so we conservatively pass false. + // This will only affect explicit tail calls when IL verification is not needed for the method. + bool isRecursive = false; + if (!impIsTailCallILPattern(tailCall, opcode, codeAddr+sz, codeEndp, isRecursive, &isCallPopAndRet)) { #ifdef _TARGET_AMD64_ BADCODE3("tail call not followed by ret or pop+ret", @@ -6721,9 +6726,9 @@ bool Compiler::fgIsThrow(GenTreePtr tree) (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_VERIFICATION)) || (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_RNGCHKFAIL) ) || (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWDIVZERO)) || -#ifndef RYUJIT_CTPBUILD +#if COR_JIT_EE_VERSION > 460 (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWNULLREF)) || -#endif +#endif // COR_JIT_EE_VERSION (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROW) ) || (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_RETHROW) ) ) { @@ -8158,7 +8163,7 @@ void Compiler::fgAddInternal() if (genReturnBB && ((info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT) || (info.compRetNativeType == TYP_STRUCT && info.compRetBuffArg == BAD_VAR_NUM))) #else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - if (genReturnBB && (info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT)) + if (genReturnBB && (info.compRetType != TYP_VOID) && !varTypeIsStruct(info.compRetNativeType)) #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { genReturnLocal = lvaGrabTemp(true DEBUGARG("Single return block return value")); @@ -8504,7 +8509,7 @@ void Compiler::fgAddInternal() } else { - noway_assert(info.compRetType == TYP_VOID || info.compRetType == TYP_STRUCT); + noway_assert(info.compRetType == TYP_VOID || varTypeIsStruct(info.compRetType)); // return void tree = new (this, GT_RETURN) GenTreeOp(GT_RETURN, TYP_VOID ); } @@ -8946,7 +8951,7 @@ void Compiler::fgComputeFPlvls(GenTreePtr tree) // This is a special case to handle the following // optimization: conv.i4(round.d(d)) -> round.i(d) - if (oper==GT_MATH && tree->gtMath.gtMathFN==CORINFO_INTRINSIC_Round && + if (oper== GT_INTRINSIC && tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round && tree->TypeGet()==TYP_INT) { codeGen->genFPstkLevel--; @@ -14149,10 +14154,10 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) if (bDest->bbJumpDest != bJump->bbNext) return false; - // 'bJump' must be in the same try region as the condition, since we're going to insert - // a duplicated condition in 'bJump', and the condition might include exception throwing code. - if (!BasicBlock::sameTryRegion(bJump, bDest)) - return false; + // 'bJump' must be in the same try region as the condition, since we're going to insert + // a duplicated condition in 'bJump', and the condition might include exception throwing code. + if (!BasicBlock::sameTryRegion(bJump, bDest)) + return false; // do not jump into another try region BasicBlock* bDestNext = bDest->bbNext; @@ -17558,10 +17563,10 @@ unsigned Compiler::acdHelper(SpecialCodeKind codeKind) switch (codeKind) { case SCK_RNGCHK_FAIL: return CORINFO_HELP_RNGCHKFAIL; -#ifndef RYUJIT_CTPBUILD +#if COR_JIT_EE_VERSION > 460 case SCK_ARG_EXCPN: return CORINFO_HELP_THROW_ARGUMENTEXCEPTION; case SCK_ARG_RNG_EXCPN: return CORINFO_HELP_THROW_ARGUMENTOUTOFRANGEEXCEPTION; -#endif //!RYUJIT_CTPBUILD +#endif //COR_JIT_EE_VERSION case SCK_DIV_BY_ZERO: return CORINFO_HELP_THROWDIVZERO; case SCK_ARITH_EXCPN: return CORINFO_HELP_OVERFLOW; default: assert(!"Bad codeKind"); return 0; @@ -17675,10 +17680,10 @@ BasicBlock* Compiler::fgAddCodeRef(BasicBlock* srcBlk, case SCK_PAUSE_EXEC: msg = " for PAUSE_EXEC"; break; case SCK_DIV_BY_ZERO: msg = " for DIV_BY_ZERO"; break; case SCK_OVERFLOW: msg = " for OVERFLOW"; break; -#ifndef RYUJIT_CTPBUILD +#if COR_JIT_EE_VERSION > 460 case SCK_ARG_EXCPN: msg = " for ARG_EXCPN"; break; case SCK_ARG_RNG_EXCPN: msg = " for ARG_RNG_EXCPN"; break; -#endif //!RYUJIT_CTPBUILD +#endif //COR_JIT_EE_VERSION default: msg = " for ??"; break; } @@ -17721,13 +17726,13 @@ BasicBlock* Compiler::fgAddCodeRef(BasicBlock* srcBlk, noway_assert(SCK_OVERFLOW == SCK_ARITH_EXCPN); break; -#ifndef RYUJIT_CTPBUILD +#if COR_JIT_EE_VERSION > 460 case SCK_ARG_EXCPN: helper = CORINFO_HELP_THROW_ARGUMENTEXCEPTION; break; case SCK_ARG_RNG_EXCPN: helper = CORINFO_HELP_THROW_ARGUMENTOUTOFRANGEEXCEPTION; break; -#endif // !RYUJIT_CTPBUILD +#endif // COR_JIT_EE_VERSION // case SCK_PAUSE_EXEC: // noway_assert(!"add code to pause exec"); @@ -18265,6 +18270,7 @@ void Compiler::fgSetBlockOrder() for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) { +#if FEATURE_FASTTAILCALL #ifndef JIT32_GCENCODER if (block->endsWithTailCallOrJmp(this, true) && !(block->bbFlags & BBF_GC_SAFE_POINT) && @@ -18286,6 +18292,7 @@ void Compiler::fgSetBlockOrder() genInterruptible = true; } #endif // !JIT32_GCENCODER +#endif // FEATURE_FASTTAILCALL fgSetBlockOrder(block); } @@ -18932,7 +18939,7 @@ unsigned Compiler::fgGetCodeEstimate(BasicBlock* block) return costSz; } -#if XML_FLOWGRAPHS +#if DUMP_FLOWGRAPHS struct escapeMapping_t { @@ -18953,7 +18960,7 @@ static escapeMapping_t s_EscapeFileMapping[] = {0, 0} }; -static escapeMapping_t s_EscapeXmlMapping[] = +static escapeMapping_t s_EscapeMapping[] = { {'<', "<"}, {'>', ">"}, @@ -19054,9 +19061,19 @@ static void fprintfDouble(FILE* fgxFile, double value) } } -/*****************************************************************************/ +//------------------------------------------------------------------------ +// fgOpenFlowGraphFile: Open a file to dump either the xml or dot format flow graph +// +// Arguments: +// wbDontClose - A boolean out argument that indicates whether the caller should close the file +// phase - A phase identifier to indicate which phase is associated with the dump +// type - A (wide) string indicating the type of dump, "dot" or "xml" +// +// Return Value: +// Opens a file to which a flowgraph can be dumped, whose name is based on the current +// config vales. -FILE* Compiler::fgOpenXmlFlowGraphFile(bool* wbDontClose) +FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phase, LPCWSTR type) { FILE* fgxFile; LPCWSTR pattern = NULL; @@ -19095,6 +19112,24 @@ FILE* Compiler::fgOpenXmlFlowGraphFile(bool* wbDontClose) if (wcslen(pattern) == 0) return NULL; + static ConfigString sJitDumpFgPhase; + LPCWSTR phasePattern = sJitDumpFgPhase.val(CLRConfig::INTERNAL_JitDumpFgPhase); + LPCWSTR phaseName = PhaseShortNames[phase]; + if (phasePattern == 0) + { + if (phase != PHASE_DETERMINE_FIRST_COLD_BLOCK) + { + return nullptr; + } + } + else if (*phasePattern != W('*')) + { + if (wcsstr(phasePattern, phaseName) == nullptr) + { + return nullptr; + } + } + if (*pattern != W('*')) { bool hasColon = (wcschr(pattern, W(':')) != 0); @@ -19222,7 +19257,7 @@ FILE* Compiler::fgOpenXmlFlowGraphFile(bool* wbDontClose) ONE_FILE_PER_METHOD:; escapedString = fgProcessEscapes(info.compFullName, s_EscapeFileMapping); - size_t wCharCount = strlen(escapedString) + strlen("~999") + strlen(".fgx") + 1; + size_t wCharCount = strlen(escapedString) + wcslen(phaseName) + 1 + strlen("~999") + wcslen(type) + 1; if (pathname != NULL) { wCharCount += wcslen(pathname) + 1; @@ -19230,11 +19265,11 @@ ONE_FILE_PER_METHOD:; filename = (LPCWSTR) alloca(wCharCount * sizeof(WCHAR)); if (pathname != NULL) { - swprintf_s((LPWSTR)filename, wCharCount, W("%s\\%S.fgx"), pathname, escapedString); + swprintf_s((LPWSTR)filename, wCharCount, W("%s\\%S-%s.%s"), pathname, escapedString, phaseName, type); } else { - swprintf_s((LPWSTR)filename, wCharCount, W("%S.fgx"), escapedString); + swprintf_s((LPWSTR)filename, wCharCount, W("%S.%s"), escapedString, type); } fgxFile = _wfopen(filename, W("r")); // Check if this file already exists if (fgxFile != NULL) @@ -19251,11 +19286,11 @@ ONE_FILE_PER_METHOD:; fclose(fgxFile); if (pathname != NULL) { - swprintf_s((LPWSTR)filename, wCharCount, W("%s\\%S~%d.fgx"), pathname, escapedString, i); + swprintf_s((LPWSTR)filename, wCharCount, W("%s\\%S~%d.%s"), pathname, escapedString, i, type); } else { - swprintf_s((LPWSTR)filename, wCharCount, W("%S~%d.fgx"), escapedString, i); + swprintf_s((LPWSTR)filename, wCharCount, W("%S~%d.%s"), escapedString, i, type); } fgxFile = _wfopen(filename, W("r")); // Check if this file exists if (fgxFile == NULL) @@ -19284,7 +19319,7 @@ ONE_FILE_PER_METHOD:; else { LPCWSTR origFilename = filename; - size_t wCharCount = wcslen(origFilename) + strlen(".fgx") + 1; + size_t wCharCount = wcslen(origFilename) + wcslen(type) + 2; if (pathname != NULL) { wCharCount += wcslen(pathname) + 1; @@ -19292,11 +19327,11 @@ ONE_FILE_PER_METHOD:; filename = (LPCWSTR) alloca(wCharCount * sizeof(WCHAR)); if (pathname != NULL) { - swprintf_s((LPWSTR)filename, wCharCount, W("%s\\%s.fgx"), pathname, origFilename); + swprintf_s((LPWSTR)filename, wCharCount, W("%s\\%s.%s"), pathname, origFilename, type); } else { - swprintf_s((LPWSTR)filename, wCharCount, W("%s.fgx"), origFilename); + swprintf_s((LPWSTR)filename, wCharCount, W("%s.%s"), origFilename, type); } fgxFile = _wfopen(filename, W("a+")); *wbDontClose = false; @@ -19305,19 +19340,54 @@ ONE_FILE_PER_METHOD:; return fgxFile; } -/*****************************************************************************/ - -bool Compiler::fgDumpXmlFlowGraph() +//------------------------------------------------------------------------ +// fgDumpFlowGraph: Dump the xml or dot format flow graph, if enabled for this phase. +// +// Arguments: +// phase - A phase identifier to indicate which phase is associated with the dump, +// i.e. which phase has just completed. +// +// Return Value: +// True iff a flowgraph has been dumped. +// +// Notes: +// The xml dumps are the historical mechanism for dumping the flowgraph. +// The dot format can be viewed by: +// - Graphviz (http://www.graphviz.org/) +// - The command "C:\Program Files (x86)\Graphviz2.38\bin\dot.exe" -Tsvg -oFoo.svg -Kdot Foo.dot +// will produce a Foo.svg file that can be opened with any svg-capable browser (e.g. IE). +// - http://rise4fun.com/Agl/ +// - Cut and paste the graph from your .dot file, replacing the digraph on the page, and then click the play button. +// - It will show a rotating '/' and then render the graph in the browser. +// MSAGL has also been open-sourced to https://github.com/Microsoft/automatic-graph-layout.git. +// +// Here are the config values that control it: +// COMPLUS_JitDumpFg A string (ala the COMPLUS_JitDump string) indicating what methods to dump flowgraphs for. +// COMPLUS_JitDumpFgDir A path to a directory into which the flowgraphs will be dumped. +// COMPLUS_JitDumpFgFile The filename to use. The default is "default.[xml|dot]". +// Note that the new graphs will be appended to this file if it already exists. +// COMPLUS_JitDumpFgPhase Phase(s) after which to dump the flowgraph. +// Set to the short name of a phase to see the flowgraph after that phase. +// Leave unset to dump after COLD-BLK (determine first cold block) or set to * for all phases. +// COMPLUS_JitDumpFgDot Set to non-zero to emit Dot instead of Xml Flowgraph dump. (Default is xml format.) + +bool Compiler::fgDumpFlowGraph(Phases phase) { bool result = false; bool dontClose = false; - FILE* fgxFile = fgOpenXmlFlowGraphFile(&dontClose); + static ConfigDWORD fJitDumpFgDot; + bool createDotFile = false; + if (fJitDumpFgDot.val(CLRConfig::INTERNAL_JitDumpFgDot)) + { + createDotFile = true; + } + + FILE* fgxFile = fgOpenFlowGraphFile(&dontClose, phase, createDotFile ? W("dot") : W("fgx")); if (fgxFile == NULL) { return false; } - bool validWeights = fgHaveValidEdgeWeights; unsigned calledCount = max(fgCalledWeight, BB_UNITY_WEIGHT) / BB_UNITY_WEIGHT; double weightDivisor = (double) (calledCount * BB_UNITY_WEIGHT); @@ -19337,52 +19407,60 @@ bool Compiler::fgDumpXmlFlowGraph() regionString="JIT"; } - fprintf(fgxFile, "<method"); + if (createDotFile) + { + fprintf(fgxFile, "digraph %s\n{\n", info.compMethodName); + fprintf(fgxFile, "/* Method %d, after phase %s */", Compiler::jitTotalMethodCompiled, PhaseNames[phase]); + } + else + { + fprintf(fgxFile, "<method"); - escapedString = fgProcessEscapes(info.compFullName, s_EscapeXmlMapping); - fprintf(fgxFile, "\n name=\"%s\"", escapedString); + escapedString = fgProcessEscapes(info.compFullName, s_EscapeMapping); + fprintf(fgxFile, "\n name=\"%s\"", escapedString); - escapedString = fgProcessEscapes(info.compClassName, s_EscapeXmlMapping); - fprintf(fgxFile, "\n className=\"%s\"", escapedString); + escapedString = fgProcessEscapes(info.compClassName, s_EscapeMapping); + fprintf(fgxFile, "\n className=\"%s\"", escapedString); - escapedString = fgProcessEscapes(info.compMethodName, s_EscapeXmlMapping); - fprintf(fgxFile, "\n methodName=\"%s\"", escapedString); - fprintf(fgxFile, "\n ngenRegion=\"%s\"", regionString); + escapedString = fgProcessEscapes(info.compMethodName, s_EscapeMapping); + fprintf(fgxFile, "\n methodName=\"%s\"", escapedString); + fprintf(fgxFile, "\n ngenRegion=\"%s\"", regionString); - fprintf(fgxFile, "\n bytesOfIL=\"%d\"", info.compILCodeSize); - fprintf(fgxFile, "\n localVarCount=\"%d\"", lvaCount); + fprintf(fgxFile, "\n bytesOfIL=\"%d\"", info.compILCodeSize); + fprintf(fgxFile, "\n localVarCount=\"%d\"", lvaCount); - if (fgHaveProfileData()) - { - fprintf(fgxFile, "\n calledCount=\"%d\"", calledCount); - fprintf(fgxFile, "\n profileData=\"true\""); - } - if (compHndBBtabCount > 0) - { - fprintf(fgxFile, "\n hasEHRegions=\"true\""); - } - if (fgHasLoops) - { - fprintf(fgxFile, "\n hasLoops=\"true\""); - } - if (validWeights) - { - fprintf(fgxFile, "\n validEdgeWeights=\"true\""); - if (!fgSlopUsedInEdgeWeights && !fgRangeUsedInEdgeWeights) + if (fgHaveProfileData()) { - fprintf(fgxFile, "\n exactEdgeWeights=\"true\""); + fprintf(fgxFile, "\n calledCount=\"%d\"", calledCount); + fprintf(fgxFile, "\n profileData=\"true\""); + } + if (compHndBBtabCount > 0) + { + fprintf(fgxFile, "\n hasEHRegions=\"true\""); + } + if (fgHasLoops) + { + fprintf(fgxFile, "\n hasLoops=\"true\""); + } + if (validWeights) + { + fprintf(fgxFile, "\n validEdgeWeights=\"true\""); + if (!fgSlopUsedInEdgeWeights && !fgRangeUsedInEdgeWeights) + { + fprintf(fgxFile, "\n exactEdgeWeights=\"true\""); + } + } + if (fgFirstColdBlock != NULL) + { + fprintf(fgxFile, "\n firstColdBlock=\"%d\"", fgFirstColdBlock->bbNum); } - } - if (fgFirstColdBlock != NULL) - { - fprintf(fgxFile, "\n firstColdBlock=\"%d\"", fgFirstColdBlock->bbNum); - } - fprintf(fgxFile, ">"); + fprintf(fgxFile, ">"); - fprintf(fgxFile, "\n <blocks"); - fprintf(fgxFile, "\n blockCount=\"%d\"", fgBBcount); - fprintf(fgxFile, ">"); + fprintf(fgxFile, "\n <blocks"); + fprintf(fgxFile, "\n blockCount=\"%d\"", fgBBcount); + fprintf(fgxFile, ">"); + } static const char* kindImage[] = { "EHFINALLYRET", "EHFILTERRET", "EHCATCHRET", "THROW", "RETURN", "NONE", "ALWAYS", "LEAVE", @@ -19394,47 +19472,73 @@ bool Compiler::fgDumpXmlFlowGraph() block != NULL; block = block->bbNext, blockOrdinal++) { - fprintf(fgxFile,"\n <block"); - fprintf(fgxFile,"\n id=\"%d\"", block->bbNum); - fprintf(fgxFile,"\n ordinal=\"%d\"", blockOrdinal); - fprintf(fgxFile,"\n jumpKind=\"%s\"", kindImage[block->bbJumpKind]); - if (block->hasTryIndex()) + if (createDotFile) { - fprintf(fgxFile,"\n inTry=\"%s\"", "true"); - } - if (block->hasHndIndex()) - { - fprintf(fgxFile,"\n inHandler=\"%s\"", "true"); - } - if (((fgFirstBB->bbFlags & BBF_PROF_WEIGHT) != 0) && - ((block->bbFlags & BBF_COLD) == 0) ) - { - fprintf(fgxFile,"\n hot=\"true\""); - } - if (block->bbFlags & (BBF_HAS_NEWOBJ | BBF_HAS_NEWARRAY)) - { - fprintf(fgxFile,"\n callsNew=\"true\""); + // Add constraint edges to try to keep nodes ordered. + // It seems to work best if these edges are all created first. + switch(block->bbJumpKind) + { + case BBJ_COND: + case BBJ_NONE: + assert(block->bbNext != nullptr); + fprintf(fgxFile, " BB%02u -> BB%02u\n", block->bbNum, block->bbNext->bbNum); + break; + default: + // These may or may not have an edge to the next block. + // Add a transparent edge to keep nodes ordered. + if (block->bbNext != nullptr) + { + fprintf(fgxFile, " BB%02u -> BB%02u [arrowtail=none,color=transparent]\n", block->bbNum, block->bbNext->bbNum); + } + } } - if (block->bbFlags & BBF_LOOP_HEAD) + else { - fprintf(fgxFile,"\n loopHead=\"true\""); + fprintf(fgxFile,"\n <block"); + fprintf(fgxFile,"\n id=\"%d\"", block->bbNum); + fprintf(fgxFile,"\n ordinal=\"%d\"", blockOrdinal); + fprintf(fgxFile,"\n jumpKind=\"%s\"", kindImage[block->bbJumpKind]); + if (block->hasTryIndex()) + { + fprintf(fgxFile,"\n inTry=\"%s\"", "true"); + } + if (block->hasHndIndex()) + { + fprintf(fgxFile,"\n inHandler=\"%s\"", "true"); + } + if (((fgFirstBB->bbFlags & BBF_PROF_WEIGHT) != 0) && + ((block->bbFlags & BBF_COLD) == 0) ) + { + fprintf(fgxFile,"\n hot=\"true\""); + } + if (block->bbFlags & (BBF_HAS_NEWOBJ | BBF_HAS_NEWARRAY)) + { + fprintf(fgxFile,"\n callsNew=\"true\""); + } + if (block->bbFlags & BBF_LOOP_HEAD) + { + fprintf(fgxFile,"\n loopHead=\"true\""); + } + fprintf(fgxFile,"\n weight="); + fprintfDouble(fgxFile, ((double) block->bbWeight) / weightDivisor); + fprintf(fgxFile,"\n codeEstimate=\"%d\"", fgGetCodeEstimate(block)); + fprintf(fgxFile,"\n startOffset=\"%d\"", block->bbCodeOffs); + fprintf(fgxFile,"\n endOffset=\"%d\"", block->bbCodeOffsEnd); + fprintf(fgxFile, ">"); + fprintf(fgxFile,"\n </block>"); } - fprintf(fgxFile,"\n weight="); - fprintfDouble(fgxFile, ((double) block->bbWeight) / weightDivisor); - fprintf(fgxFile,"\n codeEstimate=\"%d\"", fgGetCodeEstimate(block)); - fprintf(fgxFile,"\n startOffset=\"%d\"", block->bbCodeOffs); - fprintf(fgxFile,"\n endOffset=\"%d\"", block->bbCodeOffsEnd); - fprintf(fgxFile, ">"); - fprintf(fgxFile,"\n </block>"); } - fprintf(fgxFile, "\n </blocks>"); - unsigned edgeNum = 1; + if (!createDotFile) + { + fprintf(fgxFile, "\n </blocks>"); - fprintf(fgxFile, "\n <edges"); - fprintf(fgxFile, "\n edgeCount=\"%d\"", fgEdgeCount); - fprintf(fgxFile, ">"); + fprintf(fgxFile, "\n <edges"); + fprintf(fgxFile, "\n edgeCount=\"%d\"", fgEdgeCount); + fprintf(fgxFile, ">"); + } + unsigned edgeNum = 1; BasicBlock* bTarget; for (bTarget = fgFirstBB; bTarget != NULL; bTarget = bTarget->bbNext) { @@ -19461,55 +19565,86 @@ bool Compiler::fgDumpXmlFlowGraph() { sourceWeightDivisor = (double) bSource->bbWeight; } - fprintf(fgxFile,"\n <edge"); - fprintf(fgxFile,"\n id=\"%d\"", edgeNum); - fprintf(fgxFile,"\n source=\"%d\"", bSource->bbNum); - fprintf(fgxFile,"\n target=\"%d\"", bTarget->bbNum); - if (bSource->bbJumpKind == BBJ_SWITCH) + if (createDotFile) { - if (edge->flDupCount >= 2) + // Don't duplicate the edges we added above. + if ((bSource->bbNum == (bTarget->bbNum - 1)) && + ((bSource->bbJumpKind == BBJ_NONE) || (bSource->bbJumpKind == BBJ_COND))) { - fprintf(fgxFile,"\n switchCases=\"%d\"", edge->flDupCount); + continue; } - if (bSource->bbJumpSwt->getDefault() == bTarget) + fprintf(fgxFile, " BB%02u -> BB%02u", bSource->bbNum, bTarget->bbNum); + if ((bSource->bbNum > bTarget->bbNum)) { - fprintf(fgxFile,"\n switchDefault=\"true\""); + fprintf(fgxFile, "[arrowhead=normal,arrowtail=none,color=green]\n"); + } + else + { + fprintf(fgxFile, "\n"); } } - if (validWeights) + else { - unsigned edgeWeight = (edge->flEdgeWeightMin + edge->flEdgeWeightMax) / 2; - fprintf(fgxFile,"\n weight="); - fprintfDouble(fgxFile, ((double) edgeWeight) / weightDivisor); - - if (edge->flEdgeWeightMin != edge->flEdgeWeightMax) + fprintf(fgxFile,"\n <edge"); + fprintf(fgxFile,"\n id=\"%d\"", edgeNum); + fprintf(fgxFile,"\n source=\"%d\"", bSource->bbNum); + fprintf(fgxFile,"\n target=\"%d\"", bTarget->bbNum); + if (bSource->bbJumpKind == BBJ_SWITCH) { - fprintf(fgxFile,"\n minWeight="); - fprintfDouble(fgxFile, ((double) edge->flEdgeWeightMin) / weightDivisor); - fprintf(fgxFile,"\n maxWeight="); - fprintfDouble(fgxFile, ((double) edge->flEdgeWeightMax) / weightDivisor); + if (edge->flDupCount >= 2) + { + fprintf(fgxFile,"\n switchCases=\"%d\"", edge->flDupCount); + } + if (bSource->bbJumpSwt->getDefault() == bTarget) + { + fprintf(fgxFile,"\n switchDefault=\"true\""); + } } - - if (edgeWeight > 0) + if (validWeights) { - if (edgeWeight < bSource->bbWeight) + unsigned edgeWeight = (edge->flEdgeWeightMin + edge->flEdgeWeightMax) / 2; + fprintf(fgxFile,"\n weight="); + fprintfDouble(fgxFile, ((double) edgeWeight) / weightDivisor); + + if (edge->flEdgeWeightMin != edge->flEdgeWeightMax) { - fprintf(fgxFile,"\n out="); - fprintfDouble(fgxFile, ((double) edgeWeight) / sourceWeightDivisor ); + fprintf(fgxFile,"\n minWeight="); + fprintfDouble(fgxFile, ((double) edge->flEdgeWeightMin) / weightDivisor); + fprintf(fgxFile,"\n maxWeight="); + fprintfDouble(fgxFile, ((double) edge->flEdgeWeightMax) / weightDivisor); } - if (edgeWeight < bTarget->bbWeight) + + if (edgeWeight > 0) { - fprintf(fgxFile,"\n in="); - fprintfDouble(fgxFile, ((double) edgeWeight) / targetWeightDivisor); + if (edgeWeight < bSource->bbWeight) + { + fprintf(fgxFile,"\n out="); + fprintfDouble(fgxFile, ((double) edgeWeight) / sourceWeightDivisor ); + } + if (edgeWeight < bTarget->bbWeight) + { + fprintf(fgxFile,"\n in="); + fprintfDouble(fgxFile, ((double) edgeWeight) / targetWeightDivisor); + } } } } - fprintf(fgxFile, ">"); - fprintf(fgxFile,"\n </edge>"); + if (!createDotFile) + { + fprintf(fgxFile, ">"); + fprintf(fgxFile,"\n </edge>"); + } } } - fprintf(fgxFile, "\n </edges>"); - fprintf(fgxFile, "\n</method>\n"); + if (createDotFile) + { + fprintf(fgxFile, "}\n"); + } + else + { + fprintf(fgxFile, "\n </edges>"); + fprintf(fgxFile, "\n</method>\n"); + } if (dontClose) { @@ -19524,7 +19659,7 @@ bool Compiler::fgDumpXmlFlowGraph() return result; } -#endif // XML_FLOWGRAPHS +#endif // DUMP_FLOWGRAPHS /*****************************************************************************/ #ifdef DEBUG @@ -20062,23 +20197,17 @@ void Compiler::fgDumpTrees(BasicBlock* firstBlock, Compiler::fgWalkResult Compiler::fgStress64RsltMulCB(GenTreePtr* pTree, fgWalkData* data) { GenTreePtr tree = *pTree; + Compiler* pComp = data->compiler; + if (tree->gtOper != GT_MUL || tree->gtType != TYP_INT || (tree->gtOverflow())) return WALK_CONTINUE; - GenTreePtr op1 = tree->gtOp.gtOp1; - GenTreePtr op2 = tree->gtOp.gtOp2; - - Compiler* pComp = data->compiler; - - op1 = pComp->gtNewCastNode(TYP_LONG, op1, TYP_LONG); - op2 = pComp->gtNewCastNode(TYP_LONG, op2, TYP_LONG); - - GenTreePtr newMulNode = pComp->gtNewLargeOperNode(GT_MUL, TYP_LONG, op1, op2); - newMulNode = pComp->gtNewOperNode(GT_NOP, TYP_LONG, newMulNode); // To ensure optNarrowTree() doesn't fold back to the original tree - - tree->ChangeOper(GT_CAST); - tree->gtCast.CastOp() = newMulNode; - tree->CastToType() = TYP_INT; + // To ensure optNarrowTree() doesn't fold back to the original tree. + tree->gtOp.gtOp1 = pComp->gtNewOperNode(GT_NOP, TYP_LONG, tree->gtOp.gtOp1); + tree->gtOp.gtOp1 = pComp->gtNewCastNode(TYP_LONG, tree->gtOp.gtOp1, TYP_LONG); + tree->gtOp.gtOp2 = pComp->gtNewCastNode(TYP_LONG, tree->gtOp.gtOp2, TYP_LONG); + tree->gtType = TYP_LONG; + *pTree = pComp->gtNewCastNode(TYP_INT, tree, TYP_INT); return WALK_SKIP_SUBTREES; } @@ -22408,7 +22537,7 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) CORINFO_CLASS_HANDLE structType = DUMMY_INIT(0); - if (lclVarInfo[argNum].lclTypeInfo == TYP_STRUCT) + if (varTypeIsStruct(lclVarInfo[argNum].lclTypeInfo)) { if (inlArgInfo[argNum].argNode->gtOper == GT_LDOBJ) { @@ -22559,7 +22688,7 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) var_types lclTyp = (var_types)lvaTable[tmpNum].lvType; noway_assert(lclTyp == lclVarInfo[lclNum + inlineInfo->argCnt].lclTypeInfo); - if (lclTyp != TYP_STRUCT) + if (!varTypeIsStruct(lclTyp)) { // Unsafe value cls check is not needed here since in-linee compiler instance would have // iterated over locals and marked accordingly. @@ -22576,7 +22705,7 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) CORINFO_CLASS_HANDLE structType = lclVarInfo[lclNum + inlineInfo->argCnt].lclVerTypeInfo.GetClassHandle(); tree = gtNewOperNode(GT_ADDR, TYP_BYREF, - gtNewLclvNode(tmpNum, TYP_STRUCT)); + gtNewLclvNode(tmpNum, lclTyp)); tree = gtNewBlkOpNode(GT_INITBLK, tree, // Dest @@ -22672,7 +22801,7 @@ Compiler::fgWalkResult Compiler::fgChkQmarkCB(GenTreePtr* pTree, void Compiler::fgLclFldAssign(unsigned lclNum) { - assert(var_types(lvaTable[lclNum].lvType) == TYP_STRUCT); + assert(varTypeIsStruct(lvaTable[lclNum].lvType)); if (lvaTable[lclNum].lvPromoted && lvaTable[lclNum].lvFieldCnt > 1) { lvaSetVarDoNotEnregister(lclNum DEBUG_ARG(DNER_LocalField)); diff --git a/src/jit/gcencode.cpp b/src/jit/gcencode.cpp index 2c9e643fc1..599d265c39 100644 --- a/src/jit/gcencode.cpp +++ b/src/jit/gcencode.cpp @@ -1859,7 +1859,10 @@ size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, } } - if (varDsc->lvType == TYP_STRUCT && varDsc->lvOnFrame) + // A struct will have gcSlots only if it is at least TARGET_POINTER_SIZE. + if (varDsc->lvType == TYP_STRUCT && + varDsc->lvOnFrame && + (varDsc->lvExactSize >= TARGET_POINTER_SIZE)) { unsigned slots = compiler->lvaLclSize(varNum) / sizeof(void*); BYTE * gcPtrs = compiler->lvaGetGcLayout(varNum); @@ -3236,7 +3239,7 @@ unsigned GCInfo::gcInfoBlockHdrDump(const BYTE* table, { GCDump gcDump; - gcDump.gcPrintf = logf; // use my printf (which logs to VM) + gcDump.gcPrintf = gcDump_logf; // use my printf (which logs to VM) printf("Method info block:\n"); return gcDump.DumpInfoHdr(table, header, methodSize, verifyGCTables); @@ -3251,7 +3254,7 @@ unsigned GCInfo::gcDumpPtrTable(const BYTE* table, printf("Pointer table:\n"); GCDump gcDump; - gcDump.gcPrintf = logf; // use my printf (which logs to VM) + gcDump.gcPrintf = gcDump_logf; // use my printf (which logs to VM) return gcDump.DumpGCTable(table, header, methodSize, verifyGCTables); } @@ -3267,7 +3270,7 @@ void GCInfo::gcFindPtrsInFrame(const void* infoBlock, unsigned offs) { GCDump gcDump; - gcDump.gcPrintf = logf; // use my printf (which logs to VM) + gcDump.gcPrintf = gcDump_logf; // use my printf (which logs to VM) gcDump.DumpPtrsInFrame((const BYTE*)infoBlock, (const BYTE*)codeBlock, offs, verifyGCTables); } @@ -3807,7 +3810,9 @@ void GCInfo::gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder, } } - if (varDsc->lvType == TYP_STRUCT && varDsc->lvOnFrame) + // If this is a TYP_STRUCT, handle its GC pointers. + // Note that the enregisterable struct types cannot have GC pointers in them. + if ((varDsc->lvType == TYP_STRUCT) && varDsc->lvOnFrame && (varDsc->lvExactSize >= TARGET_POINTER_SIZE)) { unsigned slots = compiler->lvaLclSize(varNum) / sizeof(void*); BYTE * gcPtrs = compiler->lvaGetGcLayout(varNum); diff --git a/src/jit/gcinfo.cpp b/src/jit/gcinfo.cpp index 9fe0f75d84..6dd0824c0c 100644 --- a/src/jit/gcinfo.cpp +++ b/src/jit/gcinfo.cpp @@ -489,7 +489,9 @@ void GCInfo::gcCountForHeader(UNALIGNED unsigned int * untrackedC count++; } - else if (varDsc->lvType == TYP_STRUCT && varDsc->lvOnFrame) + else if (varDsc->lvType == TYP_STRUCT && + varDsc->lvOnFrame && + (varDsc->lvExactSize >= TARGET_POINTER_SIZE)) { unsigned slots = compiler->lvaLclSize(varNum) / sizeof(void*); BYTE * gcPtrs = compiler->lvaGetGcLayout(varNum); @@ -791,7 +793,7 @@ GCInfo::gcUpdateForRegVarMove(regMaskTP srcMask, regMaskTP dstMask, LclVarDsc *v if (srcMask != RBM_NONE) { - regSet->rsMaskVars &= ~(srcMask); + regSet->RemoveMaskVars(srcMask); if (isGCRef) { assert((gcRegByrefSetCur & srcMask) == 0); @@ -813,7 +815,7 @@ GCInfo::gcUpdateForRegVarMove(regMaskTP srcMask, regMaskTP dstMask, LclVarDsc *v } if (dstMask != RBM_NONE) { - regSet->rsMaskVars |= dstMask; + regSet->AddMaskVars(dstMask); // If the source is a reg, then the gc sets have been set appropriately // Otherwise, we have to determine whether to set them if (srcMask == RBM_NONE) diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 6d02be469d..6122e831ab 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -194,6 +194,19 @@ const char * GenTree::NodeName(genTreeOps op) return nodeNames[op]; } +static const char * opNames[] = +{ + #define GTNODE(en,sn,cm,ok) #en, + #include "gtlist.h" +}; + +const char * GenTree::OpName(genTreeOps op) +{ + assert((unsigned)op < sizeof(opNames)/sizeof(opNames[0])); + + return opNames[op]; +} + #endif /***************************************************************************** @@ -254,10 +267,7 @@ void GenTree::InitNodeSize() GenTree::s_gtNodeSizes[GT_QMARK ] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_LEA ] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_COPYOBJ ] = TREE_NODE_SZ_LARGE; - -#if INLINE_MATH - GenTree::s_gtNodeSizes[GT_MATH ] = TREE_NODE_SZ_LARGE; -#endif + GenTree::s_gtNodeSizes[GT_INTRINSIC ] = TREE_NODE_SZ_LARGE; #if USE_HELPERS_FOR_INT_DIV GenTree::s_gtNodeSizes[GT_DIV ] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_UDIV ] = TREE_NODE_SZ_LARGE; @@ -307,9 +317,7 @@ void GenTree::InitNodeSize() static_assert_no_msg(sizeof(GenTreeCmpXchg) <= TREE_NODE_SZ_LARGE); // *** large node static_assert_no_msg(sizeof(GenTreeFptrVal) <= TREE_NODE_SZ_LARGE); // *** large node static_assert_no_msg(sizeof(GenTreeQmark) <= TREE_NODE_SZ_LARGE); // *** large node -#if INLINE_MATH - static_assert_no_msg(sizeof(GenTreeMath) <= TREE_NODE_SZ_LARGE); // *** large node -#endif // INLINE_MATH + static_assert_no_msg(sizeof(GenTreeIntrinsic) <= TREE_NODE_SZ_LARGE); // *** large node static_assert_no_msg(sizeof(GenTreeIndex) <= TREE_NODE_SZ_LARGE); // *** large node static_assert_no_msg(sizeof(GenTreeArrLen) <= TREE_NODE_SZ_LARGE); // *** large node static_assert_no_msg(sizeof(GenTreeBoundsChk) <= TREE_NODE_SZ_LARGE); // *** large node @@ -868,9 +876,13 @@ Compiler::fgWalkTreeRec(GenTreePtr *pTree, fgWalkData *fgWalkData) } } + // If we're skipping subtrees, we're done. + if (result == WALK_SKIP_SUBTREES) + goto DONE; + /* Is this a constant or leaf node? */ - if (kind & (GTK_CONST|GTK_LEAF)) + if ((kind & (GTK_CONST|GTK_LEAF)) != 0) goto DONE; /* Is it a 'simple' unary/binary operator? */ @@ -1261,8 +1273,8 @@ AGAIN: // these should be included in the hash code. switch (oper) { - case GT_MATH: - if (op1->gtMath.gtMathFN != op2->gtMath.gtMathFN) return false; + case GT_INTRINSIC: + if (op1->gtIntrinsic.gtIntrinsicId != op2->gtIntrinsic.gtIntrinsicId) return false; break; case GT_LEA: if (op1->gtAddrMode.gtScale != op2->gtAddrMode.gtScale) return false; @@ -1777,8 +1789,8 @@ AGAIN: // these should be included in the hash code. switch (oper) { - case GT_MATH: - hash += tree->gtMath.gtMathFN; + case GT_INTRINSIC: + hash += tree->gtIntrinsic.gtIntrinsicId; break; case GT_LEA: hash += (tree->gtAddrMode.gtOffset << 3) + tree->gtAddrMode.gtScale; @@ -2699,8 +2711,8 @@ unsigned Compiler::gtSetEvalOrder(GenTree * tree) #elif defined _TARGET_XARCH_ case GT_CNS_LNG: - costSz = 8; - costEx = 2; + costSz = 10; + costEx = 3; goto COMMON_CNS; case GT_CNS_STR: @@ -2724,8 +2736,8 @@ unsigned Compiler::gtSetEvalOrder(GenTree * tree) #if defined(_TARGET_AMD64_) else if (iconNeedsReloc || ((tree->gtIntCon.gtIconVal & 0xFFFFFFFF00000000LL) != 0)) { - costSz = 8; - costEx = 2; + costSz = 10; + costEx = 3; } #endif // _TARGET_AMD64_ else @@ -2805,6 +2817,17 @@ COMMON_CNS: costSz += 1; } } +#if defined(_TARGET_AMD64_) + // increase costSz for floating point locals + if (isflt) + { + costSz += 1; + if (!gtIsLikelyRegVar(tree)) + { + costSz += 1; + } + } +#endif #if CPU_LONG_USES_REGPAIR if (varTypeIsLong(tree->TypeGet())) { @@ -2987,21 +3010,62 @@ COMMON_CNS: costSz = 0; break; - -#if INLINE_MATH - case GT_MATH: - if (tree->gtMath.gtMathFN==CORINFO_INTRINSIC_Round && - tree->TypeGet()==TYP_INT) + case GT_INTRINSIC: + // GT_INTRINSIC intrinsics Sin, Cos, Sqrt, Abs ... have higher costs. + // TODO: tune these costs target specific as some of these are + // target intrinsics and would cost less to generate code. + switch (tree->gtIntrinsic.gtIntrinsicId) { - // This is a special case to handle the following - // optimization: conv.i4(round.d(d)) -> round.i(d) - + default: + assert(!"missing case for gtIntrinsicId"); + costEx = 12; + costSz = 12; + break; + + case CORINFO_INTRINSIC_Sin: + case CORINFO_INTRINSIC_Cos: + case CORINFO_INTRINSIC_Sqrt: + case CORINFO_INTRINSIC_Cosh: + case CORINFO_INTRINSIC_Sinh: + case CORINFO_INTRINSIC_Tan: + case CORINFO_INTRINSIC_Tanh: + case CORINFO_INTRINSIC_Asin: + case CORINFO_INTRINSIC_Acos: + case CORINFO_INTRINSIC_Atan: + case CORINFO_INTRINSIC_Atan2: + case CORINFO_INTRINSIC_Log10: + case CORINFO_INTRINSIC_Pow: + case CORINFO_INTRINSIC_Exp: + case CORINFO_INTRINSIC_Ceiling: + case CORINFO_INTRINSIC_Floor: + case CORINFO_INTRINSIC_Object_GetType: + // Giving intrinsics a large fixed exectuion cost is because we'd like to CSE + // them, even if they are implemented by calls. This is different from modeling + // user calls since we never CSE user calls. + costEx = 36; + costSz = 4; + break; + + case CORINFO_INTRINSIC_Abs: + costEx = 5; + costSz = 15; + break; + + case CORINFO_INTRINSIC_Round: + costEx = 3; + costSz = 4; #if FEATURE_STACK_FP_X87 - codeGen->genDecrementFPstkLevel(); + if (tree->TypeGet() == TYP_INT) + { + // This is a special case to handle the following + // optimization: conv.i4(round.d(d)) -> round.i(d) + codeGen->genDecrementFPstkLevel(); + } #endif // FEATURE_STACK_FP_X87 + break; } - __fallthrough; -#endif // INLINE_MATH + level++; + break; case GT_NOT: case GT_NEG: @@ -3028,6 +3092,14 @@ COMMON_CNS: #endif // FEATURE_STACK_FP_X87 costEx = 0; costSz = 1; + + // If we have a GT_ADDR of an GT_IND we can just copy the costs from indOp1 + if (op1->OperGet() == GT_IND) + { + GenTreePtr indOp1 = op1->gtOp.gtOp1; + costEx = indOp1->gtCostEx; + costSz = indOp1->gtCostSz; + } break; case GT_ARR_LENGTH: @@ -3118,20 +3190,23 @@ COMMON_CNS: op1->gtFlags |= GTF_ADDRMODE_NO_CSE; #ifdef _TARGET_XARCH_ + // addrmodeCount is the count of items that we used to form + // an addressing mode. The maximum value is 4 when we have + // all of these: { adr, idx, cns, mul } + // + unsigned addrmodeCount = 0; if (adr) { costEx += adr->gtCostEx; costSz += adr->gtCostSz; + addrmodeCount++; } if (idx) { costEx += idx->gtCostEx; costSz += idx->gtCostSz; -#ifdef _TARGET_AMD64_ - costEx += 1; // We will need to use a movsxd instruction - costSz += 3; // to sign extend the index to 64-bits. -#endif + addrmodeCount++; } if (cns) @@ -3140,6 +3215,66 @@ COMMON_CNS: costSz += 1; else costSz += 4; + addrmodeCount++; + } + if (mul) + { + addrmodeCount++; + } + // When we form a complex addressing mode we can reduced the costs + // associated with the interior GT_ADD and GT_LSH nodes: + // + // GT_ADD -- reduce this interior GT_ADD by (-3,-3) + // / \ -- + // GT_ADD 'cns' -- reduce this interior GT_ADD by (-2,-2) + // / \ -- + // 'adr' GT_LSL -- reduce this interior GT_LSL by (-1,-1) + // / \ -- + // 'idx' 'mul' + // + if (addrmodeCount > 1) + { + // The number of interior GT_ADD and GT_LSL will always be one less than addrmodeCount + // + addrmodeCount--; + + GenTreePtr tmp = op1; + while (addrmodeCount > 0) + { + // decrement the gtCosts for the interior GT_ADD or GT_LSH node by the remaining addrmodeCount + tmp->SetCosts(tmp->gtCostEx - addrmodeCount, tmp->gtCostSz - addrmodeCount); + + addrmodeCount--; + if (addrmodeCount > 0) + { + GenTreePtr tmpOp1 = tmp->gtOp.gtOp1; + GenTreePtr tmpOp2 = tmp->gtGetOp2(); + assert(tmpOp2 != nullptr); + + if ((tmpOp1 != adr) && (tmpOp1->OperGet() == GT_ADD)) + { + tmp = tmpOp1; + } + else if (tmpOp2->OperGet() == GT_LSH) + { + tmp = tmpOp2; + } + else if (tmpOp1->OperGet() == GT_LSH) + { + tmp = tmpOp1; + } + else if (tmpOp2->OperGet() == GT_ADD) + { + tmp = tmpOp2; + } + else + { + // We can very rarely encounter a tree that has a GT_COMMA node + // that is difficult to walk, so we just early out without decrementing. + addrmodeCount = 0; + } + } + } } #elif defined _TARGET_ARM_ if (adr) @@ -3368,13 +3503,14 @@ COMMON_CNS: costSz += 2; } #endif +#ifndef _TARGET_64BIT_ if (varTypeIsLong(op1->TypeGet())) { /* Operations on longs are more expensive */ costEx += 3; costSz += 3; } - +#endif switch (oper) { case GT_MOD: @@ -3399,11 +3535,13 @@ COMMON_CNS: { /* fp division is very expensive to execute */ costEx = 36; // TYP_DOUBLE + costSz += 3; } else { /* integer division is also very expensive */ costEx = 20; + costSz += 2; #if LONG_MATH_REGPARAM if (tree->gtType == TYP_LONG) @@ -3431,12 +3569,14 @@ COMMON_CNS: if (isflt) { /* FP multiplication instructions are more expensive */ - costEx = 5; + costEx += 4; + costSz += 3; } else { /* Integer multiplication instructions are more expensive */ - costEx = 4; + costEx += 3; + costSz += 2; #if LONG_MATH_REGPARAM @@ -3477,7 +3617,6 @@ COMMON_CNS: } break; - case GT_ADD: case GT_SUB: case GT_ASG_ADD: @@ -3486,7 +3625,8 @@ COMMON_CNS: if (isflt) { /* FP instructions are a bit more expensive */ - costEx = 5; + costEx += 4; + costSz += 3; break; } @@ -3498,6 +3638,7 @@ COMMON_CNS: } break; + case GT_COMMA: /* Comma tosses the result of the left operand */ @@ -3726,30 +3867,23 @@ COMMON_CNS: } break; -#if INLINE_MATH - case GT_MATH: + case GT_INTRINSIC: - // We don't use any binary GT_MATH operators at the moment -#if 0 - switch (tree->gtMath.gtMathFN) + switch (tree->gtIntrinsic.gtIntrinsicId) { - case CORINFO_INTRINSIC_Exp: - level += 4; - break; - + case CORINFO_INTRINSIC_Atan2: case CORINFO_INTRINSIC_Pow: - level += 3; + // These math intrinsics are actually implemented by user calls. + // Increase the Sethi 'complexity' by two to reflect the argument + // register requirement. + level += 2; break; default: - assert(!"Unknown binary GT_MATH operator"); + assert(!"Unknown binary GT_INTRINSIC operator"); break; } -#else // 0 - assert(!"Unknown binary GT_MATH operator"); -#endif // !0 break; -#endif // INLINE_MATH default: break; @@ -4188,7 +4322,7 @@ COMMON_CNS: #ifdef FEATURE_SIMD case GT_SIMD_CHK: #endif // FEATURE_SIMD - costEx = 2; // cmp reg,reg and jae throw (not taken) + costEx = 4; // cmp reg,reg and jae throw (not taken) costSz = 7; // jump to cold section level = gtSetEvalOrder(tree->gtBoundsChk.gtArrLen); @@ -4373,7 +4507,6 @@ void GenTree::InsertAfterSelf(GenTree* node, GenTreeStmt* stmt /* = n this->gtNext = node; } -#ifndef LEGACY_BACKEND //------------------------------------------------------------------------ // gtGetChildPointer: If 'parent' is the parent of this node, return the pointer // to the child node so that it can be modified; otherwise, return nullptr. @@ -4493,18 +4626,16 @@ GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent) GenTreePtr GenTree::gtGetParent(GenTreePtr** parentChildPtrPtr) { // Find the parent node; it must be after this node in the execution order. - GenTreePtr * parentChildPtr = NULL; + GenTreePtr * parentChildPtr = nullptr; GenTreePtr parent; - for (parent = gtNext; parent != NULL; parent = parent->gtNext) + for (parent = gtNext; parent != nullptr; parent = parent->gtNext) { parentChildPtr = gtGetChildPointer(parent); - if (parentChildPtr != NULL) break; + if (parentChildPtr != nullptr) break; } - noway_assert(parent != NULL && parentChildPtr != NULL); if (parentChildPtrPtr != nullptr) *parentChildPtrPtr = parentChildPtr; return parent; } -#endif // !LEGACY_BACKEND /***************************************************************************** * @@ -4960,6 +5091,7 @@ GenTreeCall* Compiler::gtNewCallNode(gtCallTypes callType, node->gtRetClsHnd = nullptr; node->gtControlExpr = nullptr; node->gtCallMoreFlags = 0; + if (callType == CT_INDIRECT) { node->gtCallCookie = NULL; @@ -5013,6 +5145,12 @@ GenTreePtr Compiler::gtNewLclvNode(unsigned lnum, var_types type, IL_OFFSETX ILoffs) { + // We need to ensure that all struct values are normalized. + // It might be nice to assert this in general, but we have assignments of int to long. + if (varTypeIsStruct(type)) + { + assert(type == lvaTable[lnum].lvType); + } GenTreePtr node = new(this, GT_LCL_VAR) GenTreeLclVar(type, lnum, ILoffs ); @@ -5028,6 +5166,12 @@ GenTreePtr Compiler::gtNewLclLNode(unsigned lnum, var_types type, IL_OFFSETX ILoffs) { + // We need to ensure that all struct values are normalized. + // It might be nice to assert this in general, but we have assignments of int to long. + if (varTypeIsStruct(type)) + { + assert(type == lvaTable[lnum].lvType); + } #if SMALL_TREE_NODES /* This local variable node may later get transformed into a large node */ @@ -5067,23 +5211,12 @@ GenTreePtr Compiler::gtNewInlineCandidateReturnExpr(GenTreePtr inline GenTreePtr node = new(this, GT_RET_EXPR) GenTreeRetExpr(type); node->gtRetExpr.gtInlineCandidate = inlineCandidate; -#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (inlineCandidate->gtType == TYP_STRUCT) { - if (inlineCandidate->gtOper == GT_CALL) - { - node->gtRetExpr.gtRetClsHnd = inlineCandidate->gtCall.gtRetClsHnd; - } - else if (inlineCandidate->gtOper == GT_RET_EXPR) - { - node->gtRetExpr.gtRetClsHnd = inlineCandidate->gtRetExpr.gtRetClsHnd; - } - else - { - unreached(); - } + node->gtRetExpr.gtRetClsHnd = gtGetStructHandle(inlineCandidate); } -#endif + // GT_RET_EXPR node eventually might be bashed back to GT_CALL (when inlining is aborted for example). // Therefore it should carry the GTF_CALL flag so that all the rules about spilling can apply to it as well. // For example, impImportLeave or CEE_POP need to spill GT_RET_EXPR before empty the evaluation stack. @@ -5270,14 +5403,14 @@ GenTreePtr Compiler::gtNewAssignNode(GenTreePtr dst, GenTreePtr src DEB /* Mark the target as being assigned */ - if (dst->gtOper == GT_LCL_VAR) + if ((dst->gtOper == GT_LCL_VAR) || (dst->OperGet() == GT_LCL_FLD)) { dst->gtFlags |= GTF_VAR_DEF; - } - else if (dst->OperGet() == GT_LCL_FLD) - { - // We treat these partial writes as combined uses and defs. - dst->gtFlags |= (GTF_VAR_USEASG | GTF_VAR_DEF); + if (dst->IsPartialLclFld(this)) + { + // We treat these partial writes as combined uses and defs. + dst->gtFlags |= GTF_VAR_USEASG; + } } dst->gtFlags |= GTF_DONT_CSE; @@ -5292,6 +5425,14 @@ GenTreePtr Compiler::gtNewAssignNode(GenTreePtr dst, GenTreePtr src DEB return asg; } +// Creates a new LdObj node. +GenTreeLdObj* Compiler::gtNewLdObjNode(CORINFO_CLASS_HANDLE structHnd, GenTree* addr) +{ + var_types nodeType = impNormStructType(structHnd); + assert(varTypeIsStruct(nodeType)); + return new (this, GT_LDOBJ) GenTreeLdObj(nodeType, addr, structHnd); +} + // Creates a new CpObj node. // Parameters (exactly the same as MSIL CpObj): // @@ -5309,6 +5450,7 @@ GenTreeBlkOp* Compiler::gtNewCpObjNode(GenTreePtr dst, unsigned slots = 0; unsigned gcPtrCount = 0; BYTE * gcPtrs = nullptr; + var_types type = TYP_STRUCT; GenTreePtr hndOrSize = nullptr; @@ -5323,7 +5465,23 @@ GenTreeBlkOp* Compiler::gtNewCpObjNode(GenTreePtr dst, { slots = (unsigned)(roundUp(size, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE); gcPtrs = new (this, CMK_ASTNode) BYTE[slots]; - gcPtrCount = info.compCompHnd->getClassGClayout(structHnd, gcPtrs); + + type = impNormStructType(structHnd, gcPtrs, &gcPtrCount); + if (varTypeIsEnregisterableStruct(type)) + { + if (dst->OperGet() == GT_ADDR) + { + GenTree* actualDst = dst->gtGetOp1(); + assert((actualDst->TypeGet() == type) || !varTypeIsEnregisterableStruct(actualDst)); + actualDst->gtType = type; + } + if (src->OperGet() == GT_ADDR) + { + GenTree* actualSrc = src->gtGetOp1(); + assert((actualSrc->TypeGet() == type) || !varTypeIsEnregisterableStruct(actualSrc)); + actualSrc->gtType = type; + } + } if (gcPtrCount > 0) { @@ -5508,9 +5666,9 @@ void Compiler::gtBlockOpInit(GenTreePtr result, GenTreePtr dstChild = dst->gtGetOp1(); if (dstChild->OperIsLocal() && - dstChild->gtType == TYP_STRUCT && + varTypeIsStruct(dstChild) && srcChild->OperGet() == GT_SIMD && - isSIMDType(srcChild)) + varTypeIsSIMD(srcChild)) { unsigned lclNum = dst->gtGetOp1()->AsLclVarCommon()->GetLclNum(); LclVarDsc* lclVarDsc = &lvaTable[lclNum]; @@ -5608,6 +5766,7 @@ GenTreePtr Compiler::gtClone(GenTree * tree, bool complexOK) tree->gtField.gtFldHnd, objp, tree->gtField.gtFldOffset); + copy->gtField.gtFldMayOverlap = tree->gtField.gtFldMayOverlap; } else if (tree->gtOper == GT_ADD) { @@ -5897,8 +6056,9 @@ GenTreePtr Compiler::gtCloneExpr(GenTree * tree, copy = new (this, GT_BOX) GenTreeBox(tree->TypeGet(), tree->gtOp.gtOp1, tree->gtBox.gtAsgStmtWhenInlinedBoxValue); break; - case GT_MATH: - copy = new (this, GT_MATH) GenTreeMath(tree->TypeGet(), tree->gtOp.gtOp1, tree->gtOp.gtOp2, tree->gtMath.gtMathFN); + case GT_INTRINSIC: + copy = new (this, GT_INTRINSIC) GenTreeIntrinsic(tree->TypeGet(), tree->gtOp.gtOp1, tree->gtOp.gtOp2, tree->gtIntrinsic.gtIntrinsicId, + tree->gtIntrinsic.gtMethodHandle); break; case GT_COPYOBJ: @@ -6138,7 +6298,7 @@ GenTreePtr Compiler::gtCloneExpr(GenTree * tree, tree->gtField.gtFldOffset); copy->gtField.gtFldObj = tree->gtField.gtFldObj ? gtCloneExpr(tree->gtField.gtFldObj , addFlags, varNum, varVal) : 0; - + copy->gtField.gtFldMayOverlap = tree->gtField.gtFldMayOverlap; #ifdef FEATURE_READYTORUN_COMPILER copy->gtField.gtFieldLookup = tree->gtField.gtFieldLookup; #endif @@ -6253,6 +6413,169 @@ DONE: return copy; } +//------------------------------------------------------------------------ +// gtReplaceTree: Replace a tree with a new tree. +// +// Arguments: +// stmt - The top-level root stmt of the tree bing replaced. +// Must not be null. +// tree - The tree being replaced. Must not be null. +// replacementTree - The replacement tree. Must not be null. +// +// Return Value: +// Return the tree node actually replaces the old tree. +// +// Assumptions: +// The sequencing of the stmt has been done. +// +// Notes: +// The caller must ensure that the original statement has been sequenced, +// but this method will sequence 'replacementTree', and insert it into the +// proper place in the statement sequence. + +GenTreePtr Compiler::gtReplaceTree(GenTreePtr stmt, + GenTreePtr tree, + GenTreePtr replacementTree) +{ + assert(fgStmtListThreaded); + assert(tree != nullptr); + assert(stmt != nullptr); + assert(replacementTree != nullptr); + + GenTreePtr* treePtr = nullptr; + GenTreePtr treeParent = tree->gtGetParent(&treePtr); + + assert(treeParent != nullptr || tree == stmt->gtStmt.gtStmtExpr); + + if (treePtr == nullptr) + { + // Replace the stmt expr and rebuild the linear order for "stmt". + assert(treeParent == nullptr); + assert(fgOrder != FGOrderLinear); + stmt->gtStmt.gtStmtExpr = tree; + fgSetStmtSeq(stmt); + } + else + { + assert(treeParent != nullptr); + + GenTreePtr treeFirstNode = fgGetFirstNode(tree); + GenTreePtr treeLastNode = tree; + GenTreePtr treePrevNode = treeFirstNode->gtPrev; + GenTreePtr treeNextNode = treeLastNode->gtNext; + + *treePtr = replacementTree; + + // Build the linear order for "replacementTree". + fgSetTreeSeq(replacementTree, treePrevNode); + + // Restore linear-order Prev and Next for "replacementTree". + if (treePrevNode != nullptr) + { + treeFirstNode = fgGetFirstNode(replacementTree); + treeFirstNode->gtPrev = treePrevNode; + treePrevNode->gtNext = treeFirstNode; + } + else + { + // Update the linear oder start of "stmt" if treeFirstNode + // appears to have replaced the original first node. + assert(treeFirstNode == stmt->gtStmt.gtStmtList); + stmt->gtStmt.gtStmtList = fgGetFirstNode(replacementTree); + } + + if (treeNextNode != nullptr) + { + treeLastNode = replacementTree; + treeLastNode->gtNext = treeNextNode; + treeNextNode->gtPrev = treeLastNode; + } + + bool needFixupCallArg = false; + GenTreePtr node = treeParent; + + // If we have replaced an arg, then update pointers in argtable. + do + { + // Look for the first enclosing callsite + switch (node->OperGet()) + { + case GT_LIST: + case GT_ARGPLACE: + // "tree" is likely an argument of a call. + needFixupCallArg = true; + break; + + case GT_CALL: + if (needFixupCallArg) + { + // We have replaced an arg, so update pointers in argtable. + fgFixupArgTabEntryPtr(node, tree, replacementTree); + needFixupCallArg = false; + } + break; + + default: + // "tree" is unlikely an argument of a call. + needFixupCallArg = false; + break; + } + + if (needFixupCallArg) + { + // Keep tracking to update the first enclosing call. + node = node->gtGetParent(nullptr); + } + else + { + // Stop tracking. + node = nullptr; + } + } while (node != nullptr); + + // Propagate side-effect flags of "replacementTree" to its parents if needed. + gtUpdateSideEffects(treeParent, tree->gtFlags, replacementTree->gtFlags); + } + + return replacementTree; +} + +//------------------------------------------------------------------------ +// gtUpdateSideEffects: Update the side effects for ancestors. +// +// Arguments: +// treeParent - The immediate parent node. +// oldGtFlags - The stale gtFlags. +// newGtFlags - The new gtFlags. +// +// +// Assumptions: +// Linear order of the stmt has been established. +// +// Notes: +// The routine is used for updating the stale side effect flags for ancestor +// nodes starting from treeParent up to the top-level stmt expr. + +void Compiler::gtUpdateSideEffects(GenTreePtr treeParent, + unsigned oldGtFlags, + unsigned newGtFlags) +{ + assert(fgStmtListThreaded); + + oldGtFlags = oldGtFlags & GTF_ALL_EFFECT; + newGtFlags = newGtFlags & GTF_ALL_EFFECT; + + if (oldGtFlags != newGtFlags) + { + while (treeParent) + { + treeParent->gtFlags &= ~oldGtFlags; + treeParent->gtFlags |= newGtFlags; + treeParent = treeParent->gtGetParent(nullptr); + } + } +} + /***************************************************************************** * * Comapres two trees and returns true when both trees are the same. @@ -7112,8 +7435,15 @@ void Compiler::gtDispNode(GenTreePtr tree, { printf("L"); --msgLength; break; } goto DASH; + case GT_ADDR: + if (tree->gtFlags & GTF_ADDR_ONSTACK) + { printf("L"); --msgLength; break; } // L means LclVar + goto DASH; + case GT_LCL_FLD: case GT_LCL_VAR: + case GT_LCL_VAR_ADDR: + case GT_LCL_FLD_ADDR: case GT_STORE_LCL_FLD: case GT_STORE_LCL_VAR: case GT_REG_VAR: @@ -7302,11 +7632,14 @@ void Compiler::gtDispRegVal(GenTree * tree) printf(" REG %s", compRegVarName(tree->gtRegNum)); break; +#if CPU_LONG_USES_REGPAIR case GenTree::GT_REGTAG_REGPAIR: printf(" PAIR %s", compRegPairName(tree->gtRegPair)); break; +#endif - default: break; + default: + break; } if (tree->gtFlags & GTF_REG_VAL) @@ -7320,22 +7653,8 @@ void Compiler::gtDispRegVal(GenTree * tree) #define LONGEST_COMMON_LCL_VAR_DISPLAY_LENGTH (sizeof(LONGEST_COMMON_LCL_VAR_DISPLAY)) #define BUF_SIZE (LONGEST_COMMON_LCL_VAR_DISPLAY_LENGTH*2) -/*****************************************************************************/ -int Compiler::gtGetLclVarName(unsigned lclNum, char* buf, unsigned buf_remaining) +void Compiler::gtGetLclVarNameInfo(unsigned lclNum, const char** ilKindOut, const char** ilNameOut, unsigned * ilNumOut) { - char* bufp_next = buf; - unsigned charsPrinted = 0; - int sprintf_result; - - sprintf_result = sprintf_s(bufp_next, buf_remaining, "V%02u", lclNum); - - if (sprintf_result < 0) - return sprintf_result; - - charsPrinted += sprintf_result; - bufp_next += sprintf_result; - buf_remaining -= sprintf_result; - const char* ilKind = nullptr; const char* ilName = nullptr; @@ -7437,6 +7756,33 @@ int Compiler::gtGetLclVarName(unsigned lclNum, char* buf, unsign } } + *ilKindOut = ilKind; + *ilNameOut = ilName; + *ilNumOut = ilNum; +} + +/*****************************************************************************/ +int Compiler::gtGetLclVarName(unsigned lclNum, char* buf, unsigned buf_remaining) +{ + char* bufp_next = buf; + unsigned charsPrinted = 0; + int sprintf_result; + + sprintf_result = sprintf_s(bufp_next, buf_remaining, "V%02u", lclNum); + + if (sprintf_result < 0) + return sprintf_result; + + charsPrinted += sprintf_result; + bufp_next += sprintf_result; + buf_remaining -= sprintf_result; + + const char* ilKind = nullptr; + const char* ilName = nullptr; + unsigned ilNum = 0; + + Compiler::gtGetLclVarNameInfo(lclNum, &ilKind, &ilName, &ilNum); + if (ilName != nullptr) { sprintf_result = sprintf_s(bufp_next, buf_remaining, " %s", ilName); @@ -7772,9 +8118,11 @@ Compiler::gtDispLeaf(GenTree *tree, IndentStack* indentStack) #ifndef LEGACY_BACKEND else if (tree->InReg()) { +#if CPU_LONG_USES_REGPAIR if (isRegPairType(tree->TypeGet())) printf(" %s", compRegPairName(tree->gtRegPair)); else +#endif printf(" %s", compRegVarName(tree->gtRegNum)); } #endif // !LEGACY_BACKEND @@ -8115,15 +8463,30 @@ void Compiler::gtDispTree(GenTreePtr tree, } } - if (tree->gtOper == GT_MATH) + if (tree->gtOper == GT_INTRINSIC) { - switch (tree->gtMath.gtMathFN) + switch (tree->gtIntrinsic.gtIntrinsicId) { - case CORINFO_INTRINSIC_Sin: printf(" sin"); break; - case CORINFO_INTRINSIC_Cos: printf(" cos"); break; - case CORINFO_INTRINSIC_Sqrt: printf(" sqrt"); break; - case CORINFO_INTRINSIC_Abs: printf(" abs"); break; - case CORINFO_INTRINSIC_Round: printf(" round"); break; + case CORINFO_INTRINSIC_Sin: printf(" sin"); break; + case CORINFO_INTRINSIC_Cos: printf(" cos"); break; + case CORINFO_INTRINSIC_Sqrt: printf(" sqrt"); break; + case CORINFO_INTRINSIC_Abs: printf(" abs"); break; + case CORINFO_INTRINSIC_Round: printf(" round"); break; + case CORINFO_INTRINSIC_Cosh: printf(" cosh"); break; + case CORINFO_INTRINSIC_Sinh: printf(" sinh"); break; + case CORINFO_INTRINSIC_Tan: printf(" tan"); break; + case CORINFO_INTRINSIC_Tanh: printf(" tanh"); break; + case CORINFO_INTRINSIC_Asin: printf(" asin"); break; + case CORINFO_INTRINSIC_Acos: printf(" acos"); break; + case CORINFO_INTRINSIC_Atan: printf(" atan"); break; + case CORINFO_INTRINSIC_Atan2: printf(" atan2"); break; + case CORINFO_INTRINSIC_Log10: printf(" log10"); break; + case CORINFO_INTRINSIC_Pow: printf(" pow"); break; + case CORINFO_INTRINSIC_Exp: printf(" exp"); break; + case CORINFO_INTRINSIC_Ceiling: printf(" ceiling"); break; + case CORINFO_INTRINSIC_Floor: printf(" floor"); break; + case CORINFO_INTRINSIC_Object_GetType: printf(" objGetType"); break; + default: unreached(); } @@ -10608,7 +10971,9 @@ GenTreePtr Compiler::gtNewTempAssign(unsigned tmp, GenTreePtr val) GenTreePtr dest = gtNewLclvNode(tmp, dstTyp); dest->gtFlags |= GTF_VAR_DEF; - if (valTyp == TYP_STRUCT) + // With first-class structs, we should be propagating the class handle on all non-primitive + // struct types. But we don't have a convenient way to do that for all SIMD temps. + if (varTypeIsStruct(valTyp) && (gtGetStructHandleIfPresent(val) != NO_CLASS_HANDLE)) { /* The GT_LDOBJ may be be a child of a GT_COMMA */ GenTreePtr valx = val->gtEffectiveVal(/*commaOnly*/true); @@ -10685,7 +11050,7 @@ GenTreePtr Compiler::gtNewRefCOMfield(GenTreePtr objPtr, // The calling convention for the helper does not take into // account optimization of primitive structs. - if ((pFieldInfo->helper == CORINFO_HELP_GETFIELDSTRUCT) && (lclTyp != TYP_STRUCT)) + if ((pFieldInfo->helper == CORINFO_HELP_GETFIELDSTRUCT) && !varTypeIsStruct(lclTyp)) { helperType = TYP_STRUCT; } @@ -10723,7 +11088,7 @@ GenTreePtr Compiler::gtNewRefCOMfield(GenTreePtr objPtr, // the first parameter is really an out parameter) tree->gtCall.gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG; - if (lclTyp != TYP_STRUCT) + if (!varTypeIsStruct(lclTyp)) { // get the result as primitive type tree = impGetStructAddr(tree, structType, (unsigned)CHECK_SPILL_ALL, true); @@ -10743,7 +11108,7 @@ GenTreePtr Compiler::gtNewRefCOMfield(GenTreePtr objPtr, // OK, now do the indirection if (access & CORINFO_ACCESS_GET) { - if (lclTyp == TYP_STRUCT) + if (varTypeIsStruct(lclTyp)) { tree = new (this, GT_LDOBJ) GenTreeLdObj(lclTyp, tree, structType); tree->gtLdObj.gtFldTreeList = NULL; @@ -10756,7 +11121,7 @@ GenTreePtr Compiler::gtNewRefCOMfield(GenTreePtr objPtr, } else if (access & CORINFO_ACCESS_SET) { - if (lclTyp == TYP_STRUCT) + if (varTypeIsStruct(lclTyp)) tree = impAssignStructPtr(tree, assg, structType, (unsigned)CHECK_SPILL_ALL); else { @@ -10912,6 +11277,19 @@ bool Compiler::gtTreeHasSideEffects(GenTreePtr tree, return false; } } + else if (tree->OperGet() == GT_INTRINSIC) + { + if (gtNodeHasSideEffects(tree, flags)) + return true; + + if (gtNodeHasSideEffects(tree->gtOp.gtOp1, flags)) + return true; + + if ((tree->gtOp.gtOp2 != nullptr) && gtNodeHasSideEffects(tree->gtOp.gtOp2, flags)) + return true; + + return false; + } } return true; @@ -11365,6 +11743,11 @@ bool Compiler::gtCanOptimizeTypeEquality(GenTreePtr tree) } } } + else if ((tree->gtOper == GT_INTRINSIC) && + (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) + { + return true; + } else if (tree->gtOper == GT_LCL_VAR) { LclVarDsc * lcl = &(lvaTable[tree->gtLclVarCommon.gtLclNum]); @@ -11795,16 +12178,32 @@ bool GenTree::IsPhiDefnStmt() return asg->IsPhiDefn(); } +// IsPartialLclFld: Check for a GT_LCL_FLD whose type is a different size than the lclVar. +// +// Arguments: +// comp - the Compiler object. +// +// Return Value: +// Returns "true" iff 'this' is a GT_LCL_FLD or GT_STORE_LCL_FLD on which the type +// is not the same size as the type of the GT_LCL_VAR + +bool GenTree::IsPartialLclFld(Compiler* comp) +{ + return ((gtOper == GT_LCL_FLD) && + (comp->lvaTable[this->gtLclVarCommon.gtLclNum].lvExactSize != genTypeSize(gtType))); +} + bool GenTree::DefinesLocal(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, bool* pIsEntire) { if (OperIsAssignment()) { if (gtOp.gtOp1->IsLocal()) { - *pLclVarTree = gtOp.gtOp1->AsLclVarCommon(); - if (pIsEntire != NULL) + GenTreeLclVarCommon* lclVarTree = gtOp.gtOp1->AsLclVarCommon(); + *pLclVarTree = lclVarTree; + if (pIsEntire != nullptr) { - if (gtOp.gtOp1->OperGet() == GT_LCL_FLD) + if (lclVarTree->IsPartialLclFld(comp)) { *pIsEntire = false; } @@ -12175,19 +12574,104 @@ size_t GenTreeIndir::Offset() } #ifdef _TARGET_AMD64_ -bool GenTreeIntConCommon::FitsInAddrBase(Compiler *comp) +// Returns true if this absolute address fits within the base of an addr mode. +// On Amd64 this effectively means, whether an absolute indirect address can +// be encoded as 32-bit offset relative to IP or zero. +bool GenTreeIntConCommon::FitsInAddrBase(Compiler* comp) { - return IsIntCnsFitsInI32() || - (comp->opts.compReloc && - IsIconHandle() && - (IMAGE_REL_BASED_REL32 == comp->info.compCompHnd->getRelocTypeHint((void *)IconValue()))); +#ifndef LEGACY_BACKEND +#ifdef DEBUG + // Early out if PC-rel encoding of absolute addr is disabled. + if (!comp->opts.compEnablePCRelAddr) + { + return false; + } +#endif +#endif //!LEGACY_BACKEND + + if (comp->opts.compReloc) + { + // During Ngen JIT is always asked to generate relocatable code. + // Hence JIT will try to encode only icon handles as pc-relative offsets. + return IsIconHandle() && (IMAGE_REL_BASED_REL32 == comp->eeGetRelocTypeHint((void *)IconValue())); + } + else + { + // During Jitting, we are allowed to generate non-relocatable code. + // On Amd64 we can encode an absolute indirect addr as an offset relative to zero or RIP. + // An absolute indir addr that can fit within 32-bits can ben encoded as an offset relative + // to zero. All other absolute indir addr could be attempted to be encoded as RIP relative + // based on reloc hint provided by VM. RIP relative encoding is preferred over relative + // to zero, because the former is one byte smaller than the latter. For this reason + // we check for reloc hint first and then whether addr fits in 32-bits next. + // + // VM starts off with an initial state to allow both data and code address to be encoded as + // pc-relative offsets. Hence JIT will attempt to encode all absolute addresses as pc-relative + // offsets. It is possible while jitting a method, an address could not be encoded as a + // pc-relative offset. In that case VM will note the overflow and will trigger re-jitting + // of the method with reloc hints turned off for all future methods. Second time around + // jitting will succeed since JIT will not attempt to encode data addresses as pc-relative + // offsets. Note that JIT will always attempt to relocate code addresses (.e.g call addr). + // After an overflow, VM will assume any relocation recorded is for a code address and will + // emit jump thunk if it cannot be encoded as pc-relative offset. + return (IMAGE_REL_BASED_REL32 == comp->eeGetRelocTypeHint((void *)IconValue())) || FitsInI32(); + } } -#elif defined(_TARGET_X86_) -bool GenTreeIntConCommon::FitsInAddrBase(Compiler *comp) + +// Returns true if this icon value is encoded as immediate value needs recording a relocation with VM +bool GenTreeIntConCommon::ImmedValNeedsReloc(Compiler* comp) +{ + return comp->opts.compReloc && IsIconHandle(); +} + +// Returns true if this icon value is encoded as addr needs recording a relocation with VM +bool GenTreeIntConCommon::AddrNeedsReloc(Compiler* comp) { - return IsCnsIntOrI(); + if (comp->opts.compReloc) + { + // During Ngen JIT is always asked to generate relocatable code. + // Hence JIT will try to encode only icon handles as pc-relative offsets. + return IsIconHandle() && (IMAGE_REL_BASED_REL32 == comp->eeGetRelocTypeHint((void *)IconValue())); + } + else + { + return IMAGE_REL_BASED_REL32 == comp->eeGetRelocTypeHint((void *)IconValue()); + } } + +#elif defined(_TARGET_X86_) +// Returns true if this absolute address fits within the base of an addr mode. +// On x86 all addresses are 4-bytes and can be directly encoded in an addr mode. +bool GenTreeIntConCommon::FitsInAddrBase(Compiler* comp) +{ +#ifndef LEGACY_BACKEND +#ifdef DEBUG + // Early out if PC-rel encoding of absolute addr is disabled. + if (!comp->opts.compEnablePCRelAddr) + { + return false; + } #endif +#endif //!LEGACY_BACKEND + + //TODO-x86 - TLS field handles are excluded for now as they are accessed relative to FS segment. + //Handling of TLS field handles is a NYI and this needs to be relooked after implementing it. + return IsCnsIntOrI() && !IsIconHandle(GTF_ICON_TLS_HDL); +} + +// Returns true if this icon value is encoded as immediate value needs recording a relocation with VM +bool GenTreeIntConCommon::ImmedValNeedsReloc(Compiler* comp) +{ + return comp->opts.compReloc && IsIconHandle(); +} + +// Returns true if this icon value is encoded as addr needs recording a relocation with VM +bool GenTreeIntConCommon::AddrNeedsReloc(Compiler* comp) +{ + //If generating relocatable code, icons should be reported for recording relocatons. + return comp->opts.compReloc && IsIconHandle(); +} +#endif //_TARGET_X86_ bool GenTree::IsFieldAddr(Compiler* comp, GenTreePtr* pObj, GenTreePtr* pStatic, FieldSeqNode** pFldSeq) { @@ -12371,6 +12855,49 @@ bool Compiler::gtIsStaticFieldPtrToBoxedStruct(var_types fieldNodeType, CORINFO_ return fieldTyp != TYP_REF; } +CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree) +{ + CORINFO_CLASS_HANDLE structHnd = NO_CLASS_HANDLE; + tree = tree->gtEffectiveVal(); + if (varTypeIsStruct(tree->gtType)) + { + switch(tree->gtOper) + { + default: + break; + case GT_LDOBJ: structHnd = tree->gtLdObj.gtClass; break; + case GT_CALL: structHnd = tree->gtCall.gtRetClsHnd; break; + case GT_RET_EXPR: structHnd = tree->gtRetExpr.gtRetClsHnd; break; + case GT_ARGPLACE: structHnd = tree->gtArgPlace.gtArgPlaceClsHnd; break; + case GT_INDEX: structHnd = tree->gtIndex.gtStructElemClass; break; + case GT_FIELD: info.compCompHnd->getFieldType(tree->gtField.gtFldHnd, &structHnd); break; + case GT_ASG: + structHnd = gtGetStructHandle(tree->gtGetOp1()); + break; + case GT_LCL_VAR: + structHnd = lvaTable[tree->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle(); + assert(structHnd != NO_CLASS_HANDLE); + break; + case GT_IND: + if (tree->gtFlags & GTF_IND_ARR_INDEX) + { + ArrayInfo arrInfo; + bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo); + assert(b); + structHnd = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType); + } + } + } + return structHnd; +} + +CORINFO_CLASS_HANDLE Compiler::gtGetStructHandle(GenTree* tree) +{ + CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(tree); + assert(structHnd != NO_CLASS_HANDLE); + return structHnd; +} + void GenTree::ParseArrayAddress(Compiler* comp, ArrayInfo* arrayInfo, GenTreePtr* pArr, ValueNum* pInxVN, FieldSeqNode** pFldSeq) { *pArr = nullptr; @@ -12428,18 +12955,65 @@ void GenTree::ParseArrayAddress(Compiler* comp, ArrayInfo* arrayInfo, GenTreePtr assert((constIndOffset % elemSize) == 0); ssize_t constInd = constIndOffset / elemSize; + ValueNumStore* vnStore = comp->GetValueNumStore(); + if (inxVN == ValueNumStore::NoVN) { // Must be a constant index. - *pInxVN = comp->GetValueNumStore()->VNForPtrSizeIntCon(constInd); + *pInxVN = vnStore->VNForPtrSizeIntCon(constInd); } else { - *pInxVN = inxVN; - if (constInd != 0) + // + // Perform ((inxVN / elemSizeVN) + vnForConstInd) + // + + // The value associated with the index value number (inxVN) is the offset into the array, + // which has been scaled by element size. We need to recover the array index from that offset + if (vnStore->IsVNConstant(inxVN)) + { + ssize_t index = vnStore->CoercedConstantValue<ssize_t>(inxVN); + noway_assert(elemSize > 0 && ((index % elemSize) == 0)); + *pInxVN = vnStore->VNForPtrSizeIntCon((index / elemSize) + constInd); + } + else { - ValueNum vnForConstInd = comp->GetValueNumStore()->VNForPtrSizeIntCon(constInd); - *pInxVN = comp->GetValueNumStore()->VNForFunc(TYP_I_IMPL, GetVNFuncForOper(GT_ADD, (gtFlags & GTF_UNSIGNED) != 0), *pInxVN, vnForConstInd); + bool canFoldDiv = false; + + // If the index VN is a MUL by elemSize, see if we can eliminate it instead of adding + // the division by elemSize. + VNFuncApp funcApp; + if (vnStore->GetVNFunc(inxVN, &funcApp) && funcApp.m_func == (VNFunc) GT_MUL) + { + ValueNum vnForElemSize = vnStore->VNForLongCon(elemSize); + + // One of the multiply operand is elemSize, so the resulting + // index VN should simply be the other operand. + if (funcApp.m_args[1] == vnForElemSize) + { + *pInxVN = funcApp.m_args[0]; + canFoldDiv = true; + } + else if (funcApp.m_args[0] == vnForElemSize) + { + *pInxVN = funcApp.m_args[1]; + canFoldDiv = true; + } + } + + // Perform ((inxVN / elemSizeVN) + vnForConstInd) + if (!canFoldDiv) + { + ValueNum vnForElemSize = vnStore->VNForPtrSizeIntCon(elemSize); + ValueNum vnForScaledInx = vnStore->VNForFunc(TYP_I_IMPL, GetVNFuncForOper(GT_DIV, false), inxVN, vnForElemSize); + *pInxVN = vnForScaledInx; + } + + if (constInd != 0) + { + ValueNum vnForConstInd = comp->GetValueNumStore()->VNForPtrSizeIntCon(constInd); + *pInxVN = comp->GetValueNumStore()->VNForFunc(TYP_I_IMPL, GetVNFuncForOper(GT_ADD, (gtFlags & GTF_UNSIGNED) != 0), *pInxVN, vnForConstInd); + } } } } diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 331f87ae80..629f1fb489 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -68,7 +68,7 @@ enum SpecialCodeKind DECLARE_TYPED_ENUM(genTreeOps,BYTE) { - #define GTNODE(en,sn,cm,ok) en, + #define GTNODE(en,sn,cm,ok) GT_ ## en, #include "gtlist.h" GT_COUNT, @@ -348,14 +348,14 @@ struct GenTree #endif // FEATURE_ANYCSE #if ASSERTION_PROP - unsigned char gtAssertionNum; // 0 or Assertion table index - // valid only for non-GT_STMT nodes + unsigned short gtAssertionNum; // 0 or Assertion table index + // valid only for non-GT_STMT nodes bool HasAssertion() const { return gtAssertionNum != 0; } void ClearAssertion() { gtAssertionNum = 0; } - unsigned int GetAssertion() const { return gtAssertionNum; } - void SetAssertion(unsigned int value) { assert((unsigned char)value == value); gtAssertionNum = (unsigned char)value; } + unsigned short GetAssertion() const { return gtAssertionNum; } + void SetAssertion(unsigned short value) { assert((unsigned short)value == value); gtAssertionNum = (unsigned short)value; } #endif @@ -443,13 +443,17 @@ public: { GT_REGTAG_NONE, // Nothing has been assigned to _gtRegNum/_gtRegPair GT_REGTAG_REG, // _gtRegNum has been assigned +#if CPU_LONG_USES_REGPAIR GT_REGTAG_REGPAIR // _gtRegPair has been assigned +#endif }; genRegTag GetRegTag() const { - assert(gtRegTag == GT_REGTAG_NONE || - gtRegTag == GT_REGTAG_REG || - gtRegTag == GT_REGTAG_REGPAIR); +#if CPU_LONG_USES_REGPAIR + assert(gtRegTag == GT_REGTAG_NONE || gtRegTag == GT_REGTAG_REG || gtRegTag == GT_REGTAG_REGPAIR); +#else + assert(gtRegTag == GT_REGTAG_NONE || gtRegTag == GT_REGTAG_REG); +#endif return gtRegTag; } private: @@ -520,6 +524,7 @@ public: assert(_gtRegNum == reg); } +#if CPU_LONG_USES_REGPAIR __declspec(property(get=GetRegPair,put=SetRegPair)) regPairNo gtRegPair; @@ -544,6 +549,7 @@ public: INDEBUG(gtRegTag = GT_REGTAG_REGPAIR;) assert(_gtRegPair == regPair); } +#endif // Copy the _gtRegNum/_gtRegPair/gtRegTag fields void CopyReg(GenTreePtr from) @@ -563,6 +569,7 @@ public: // // In order for this to work properly, gtClearReg (above) must be called prior to setting // the register value. +#if CPU_LONG_USES_REGPAIR if (isRegPairType(TypeGet())) { assert(_gtRegNum != REG_NA); @@ -570,6 +577,7 @@ public: return gtRegPair != REG_PAIR_NONE; } else +#endif { assert(_gtRegNum != REG_PAIR_NONE); INDEBUG(assert(gtRegTag == GT_REGTAG_REG)); @@ -579,11 +587,13 @@ public: regMaskTP gtGetRegMask() const { +#if CPU_LONG_USES_REGPAIR if (isRegPairType(TypeGet())) { return genRegPairMask(gtRegPair); } else +#endif { return genRegMask(gtRegNum); } @@ -819,7 +829,7 @@ public: #define GTF_CLS_VAR_ASG_LHS 0x04000000 // GT_CLS_VAR -- this GT_CLS_VAR node is (the effective val) of the LHS of an assignment; don't evaluate it independently. - #define GTF_ADDR_ONSTACK 0x80000000 // GT_ADDR -- this expression is guarenteed to be on the stack + #define GTF_ADDR_ONSTACK 0x80000000 // GT_ADDR -- this expression is guaranteed to be on the stack #define GTF_ADDRMODE_NO_CSE 0x80000000 // GT_ADD/GT_MUL/GT_LSH -- Do not CSE this node only, forms complex addressing mode @@ -1254,6 +1264,11 @@ public: #endif // !FEATURE_SIMD } + bool OperIsSIMD() + { + return OperIsSIMD(gtOper); + } + // Requires that "op" is an op= operator. Returns // the corresponding "op". static @@ -1284,7 +1299,7 @@ public: switch (gtOper) { case GT_LIST: - case GT_MATH: + case GT_INTRINSIC: case GT_LEA: case GT_STOREIND: case GT_INITBLK: @@ -1383,6 +1398,9 @@ public: static const char * NodeName(genTreeOps op); + static + const char * OpName(genTreeOps op); + //--------------------------------------------------------------------- #endif //--------------------------------------------------------------------- @@ -1402,6 +1420,10 @@ public: return OperIsLocal(OperGet()); } + // Returns "true" iff 'this' is a GT_LCL_FLD or GT_STORE_LCL_FLD on which the type + // is not the same size as the type of the GT_LCL_VAR. + bool IsPartialLclFld(Compiler* comp); + // Returns "true" iff "this" defines a local variable. Requires "comp" to be the // current compilation. If returns "true", sets "*pLclVarTree" to the // tree for the local that is defined, and, if "pIsEntire" is non-null, sets "*pIsEntire" to @@ -1749,8 +1771,24 @@ struct GenTreeIntConCommon: public GenTree DEBUG_ARG(largeNode)) {} + bool FitsInI32() + { + return FitsInI32(IconValue()); + } + + static bool FitsInI32(ssize_t val) + { +#ifdef _TARGET_64BIT_ + return (int)val == val; +#else + return true; +#endif + } + #ifdef _TARGET_XARCH_ - bool FitsInAddrBase(Compiler *comp); + bool FitsInAddrBase(Compiler* comp); + bool ImmedValNeedsReloc(Compiler* comp); + bool AddrNeedsReloc(Compiler* comp); #endif #if DEBUGGABLE_GENTREE @@ -2217,6 +2255,7 @@ struct GenTreeField: public GenTree GenTreePtr gtFldObj; CORINFO_FIELD_HANDLE gtFldHnd; DWORD gtFldOffset; + bool gtFldMayOverlap; #ifdef FEATURE_READYTORUN_COMPILER CORINFO_CONST_LOOKUP gtFieldLookup; #endif @@ -2224,7 +2263,9 @@ struct GenTreeField: public GenTree GenTreeField(var_types type) : GenTree(GT_FIELD, type ) - {} + { + gtFldMayOverlap = false; + } #if DEBUGGABLE_GENTREE GenTreeField() : GenTree() {} #endif @@ -2331,9 +2372,10 @@ struct GenTreeCall final : public GenTree #if FEATURE_TAILCALL_OPT #define GTF_CALL_M_IMPLICIT_TAILCALL 0x0400 // GT_CALL -- call is an opportunistic tail call and importer has performed tail call checks +#define GTF_CALL_M_TAILCALL_TO_LOOP 0x0800 // GT_CALL -- call is a fast recursive tail call that can be converted into a loop #endif -#define GTF_CALL_M_PINVOKE 0x0800 // GT_CALL -- call is a pinvoke. This mirrors VM flag CORINFO_FLG_PINVOKE. +#define GTF_CALL_M_PINVOKE 0x1000 // GT_CALL -- call is a pinvoke. This mirrors VM flag CORINFO_FLG_PINVOKE. // A call marked as Pinvoke is not necessarily a GT_CALL_UNMANAGED. For e.g. // an IL Stub dynamically generated for a PInvoke declaration is flagged as // a Pinvoke but not as an unmanaged call. See impCheckForPInvokeCall() to @@ -2411,8 +2453,10 @@ struct GenTreeCall final : public GenTree // Returns true if this is marked for opportunistic tail calling. // That is, can be tail called though not explicitly prefixed with "tail" prefix. bool IsImplicitTailCall() { return (gtCallMoreFlags & GTF_CALL_M_IMPLICIT_TAILCALL) != 0; } + bool IsTailCallConvertibleToLoop() { return (gtCallMoreFlags & GTF_CALL_M_TAILCALL_TO_LOOP) != 0; } #else // !FEATURE_TAILCALL_OPT bool IsImplicitTailCall() { return false; } + bool IsTailCallConvertibleToLoop() { return false; } #endif // !FEATURE_TAILCALL_OPT bool IsSameThis() { return (gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS) != 0; } @@ -2425,7 +2469,7 @@ struct GenTreeCall final : public GenTree unsigned char gtCallType :3; // value from the gtCallTypes enumeration unsigned char gtReturnType :5; // exact return type - CORINFO_CLASS_HANDLE gtRetClsHnd; // The return type handle of the call if it is a struct; used for HFAs. + CORINFO_CLASS_HANDLE gtRetClsHnd; // The return type handle of the call if it is a struct; always available union { @@ -2517,32 +2561,30 @@ struct GenTreeQmark : public GenTreeOp #endif }; +/* gtIntrinsic -- intrinsic (possibly-binary op [NULL op2 is allowed] with an additional field) */ -#if INLINE_MATH - -/* gtMath -- math intrinsic (possibly-binary op [NULL op2 is allowed] with an additional field) */ - -struct GenTreeMath: public GenTreeOp +struct GenTreeIntrinsic: public GenTreeOp { - CorInfoIntrinsics gtMathFN; + CorInfoIntrinsics gtIntrinsicId; + CORINFO_METHOD_HANDLE gtMethodHandle; // Method handle of the method which is treated as an intrinsic. - GenTreeMath(var_types type, GenTreePtr op1, CorInfoIntrinsics mathFN) : - GenTreeOp(GT_MATH, type, op1, NULL), - gtMathFN(mathFN) + GenTreeIntrinsic(var_types type, GenTreePtr op1, CorInfoIntrinsics intrinsicId, CORINFO_METHOD_HANDLE methodHandle) : + GenTreeOp(GT_INTRINSIC, type, op1, NULL), + gtIntrinsicId(intrinsicId), + gtMethodHandle(methodHandle) {} - GenTreeMath(var_types type, GenTreePtr op1, GenTreePtr op2, CorInfoIntrinsics mathFN) : - GenTreeOp(GT_MATH, type, op1, op2), - gtMathFN(mathFN) + GenTreeIntrinsic(var_types type, GenTreePtr op1, GenTreePtr op2, CorInfoIntrinsics intrinsicId, CORINFO_METHOD_HANDLE methodHandle) : + GenTreeOp(GT_INTRINSIC, type, op1, op2), + gtIntrinsicId(intrinsicId), + gtMethodHandle(methodHandle) {} #if DEBUGGABLE_GENTREE - GenTreeMath() : GenTreeOp() {} + GenTreeIntrinsic() : GenTreeOp() {} #endif }; -#endif // INLINE_MATH - #ifdef FEATURE_SIMD /* gtSIMD -- SIMD intrinsic (possibly-binary op [NULL op2 is allowed] with additional fields) */ @@ -3017,21 +3059,76 @@ protected: #endif }; -// StoreInd is just a BinOp, no additional data +// Read-modify-write status of a RMW memory op rooted at a storeInd +enum RMWStatus { + STOREIND_RMW_STATUS_UNKNOWN, // RMW status of storeInd unknown + // Default status unless modified by IsRMWMemOpRootedAtStoreInd() + + // One of these denote storeind is a RMW memory operation. + STOREIND_RMW_DST_IS_OP1, // StoreInd is known to be a RMW memory op and dst candidate is op1 + STOREIND_RMW_DST_IS_OP2, // StoreInd is known to be a RMW memory op and dst candidate is op2 + + // One of these denote the reason for storeind is marked as non-RMW operation + STOREIND_RMW_UNSUPPORTED_ADDR, // Addr mode is not yet supported for RMW memory + STOREIND_RMW_UNSUPPORTED_OPER, // Operation is not supported for RMW memory + STOREIND_RMW_UNSUPPORTED_TYPE, // Type is not supported for RMW memory + STOREIND_RMW_INDIR_UNEQUAL // Indir to read value is not equivalent to indir that writes the value +}; + +// StoreInd is just a BinOp, with additional RMW status struct GenTreeStoreInd: public GenTreeIndir { +#if !CPU_LOAD_STORE_ARCH + // The below flag is set and used during lowering + RMWStatus gtRMWStatus; + + bool IsRMWStatusUnknown() { return gtRMWStatus == STOREIND_RMW_STATUS_UNKNOWN; } + bool IsNonRMWMemoryOp() { + return gtRMWStatus == STOREIND_RMW_UNSUPPORTED_ADDR || + gtRMWStatus == STOREIND_RMW_UNSUPPORTED_OPER || + gtRMWStatus == STOREIND_RMW_UNSUPPORTED_TYPE || + gtRMWStatus == STOREIND_RMW_INDIR_UNEQUAL; + } + bool IsRMWMemoryOp() { return gtRMWStatus == STOREIND_RMW_DST_IS_OP1 || gtRMWStatus == STOREIND_RMW_DST_IS_OP2; } + bool IsRMWDstOp1() { return gtRMWStatus == STOREIND_RMW_DST_IS_OP1; } + bool IsRMWDstOp2() { return gtRMWStatus == STOREIND_RMW_DST_IS_OP2; } +#endif //!CPU_LOAD_STORE_ARCH + + RMWStatus GetRMWStatus() { +#if !CPU_LOAD_STORE_ARCH + return gtRMWStatus; +#else + return STOREIND_RMW_STATUS_UNKNOWN; +#endif + } + + void SetRMWStatusDefault() + { +#if !CPU_LOAD_STORE_ARCH + gtRMWStatus = STOREIND_RMW_STATUS_UNKNOWN; +#endif + } + + void SetRMWStatus(RMWStatus status) + { +#if !CPU_LOAD_STORE_ARCH + gtRMWStatus = status; +#endif + } + GenTreePtr& Data() { return gtOp2; } GenTreeStoreInd(var_types type, GenTree *destPtr, GenTree *data) : GenTreeIndir(GT_STOREIND, type, destPtr, data) { + SetRMWStatusDefault(); } #if DEBUGGABLE_GENTREE protected: friend GenTree; // Used only for GenTree::GetVtableForOper() - GenTreeStoreInd() : GenTreeIndir() {} + GenTreeStoreInd() : GenTreeIndir() { SetRMWStatusDefault(); } #endif }; @@ -3042,9 +3139,7 @@ struct GenTreeRetExpr: public GenTree { GenTreePtr gtInlineCandidate; -#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) CORINFO_CLASS_HANDLE gtRetClsHnd; -#endif GenTreeRetExpr(var_types type) : GenTree(GT_RET_EXPR, type) diff --git a/src/jit/gschecks.cpp b/src/jit/gschecks.cpp index fe9784ce57..9f85b557a0 100644 --- a/src/jit/gschecks.cpp +++ b/src/jit/gschecks.cpp @@ -397,6 +397,7 @@ void Compiler::gsParamsToShadows() for (UINT lclNum = 0; lclNum < lvaOldCount; lclNum++) { LclVarDsc *varDsc = &lvaTable[lclNum]; + gsShadowVarInfo[lclNum].shadowCopy = NO_SHADOW_COPY; // Only care about params whose values are on the stack if (!ShadowParamVarInfo::mayNeedShadowCopy(varDsc)) @@ -405,8 +406,7 @@ void Compiler::gsParamsToShadows() } if (!varDsc->lvIsPtr && !varDsc->lvIsUnsafeBuffer) - { - gsShadowVarInfo[lclNum].shadowCopy = NO_SHADOW_COPY; + { continue; } @@ -447,11 +447,6 @@ void Compiler::gsParamsToShadows() { LclVarDsc *varDsc = &lvaTable[lclNum]; - if (!ShadowParamVarInfo::mayNeedShadowCopy(varDsc)) - { - continue; - } - unsigned shadowVar = gsShadowVarInfo[lclNum].shadowCopy; if (shadowVar == NO_SHADOW_COPY) { @@ -487,6 +482,60 @@ void Compiler::gsParamsToShadows() fgEnsureFirstBBisScratch(); (void) fgInsertStmtAtBeg(fgFirstBB, fgMorphTree(opAssign)); } + + // If the method has "Jmp CalleeMethod", then we need to copy shadow params back to original + // params before "jmp" to CalleeMethod. + if (compJmpOpUsed) + { + // There could be more than one basic block ending with a "Jmp" type tail call. + // We would have to insert assignments in all such blocks, just before GT_JMP stmnt. + for (BasicBlock * block = fgFirstBB; block; block = block->bbNext) + { + if (block->bbJumpKind != BBJ_RETURN) + { + continue; + } + + if ((block->bbFlags & BBF_HAS_JMP) == 0) + { + continue; + } + + for (UINT lclNum = 0; lclNum < info.compArgsCount; lclNum++) + { + LclVarDsc *varDsc = &lvaTable[lclNum]; + + unsigned shadowVar = gsShadowVarInfo[lclNum].shadowCopy; + if (shadowVar == NO_SHADOW_COPY) + { + continue; + } + + GenTreePtr src = gtNewLclvNode(shadowVar, lvaTable[shadowVar].TypeGet()); + GenTreePtr dst = gtNewLclvNode(lclNum, varDsc->TypeGet()); + + src->gtFlags |= GTF_DONT_CSE; + dst->gtFlags |= GTF_DONT_CSE; + + GenTreePtr opAssign = nullptr; + if (varDsc->TypeGet() == TYP_STRUCT) + { + CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle(); + src = gtNewOperNode(GT_ADDR, TYP_BYREF, src); + dst = gtNewOperNode(GT_ADDR, TYP_BYREF, dst); + + opAssign = gtNewCpObjNode(dst, src, clsHnd, false); + } + else + { + opAssign = gtNewAssignNode(dst, src); + } + + (void) fgInsertStmtNearEnd(block, fgMorphTree(opAssign)); + } + + } + } } diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h index a23d8ad579..91954c2732 100644 --- a/src/jit/gtlist.h +++ b/src/jit/gtlist.h @@ -14,74 +14,71 @@ // ,commutative // ,operKind -GTNODE(GT_NONE , "<none>" ,0,GTK_SPECIAL) +GTNODE(NONE , "<none>" ,0,GTK_SPECIAL) //----------------------------------------------------------------------------- // Leaf nodes (i.e. these nodes have no sub-operands): //----------------------------------------------------------------------------- -GTNODE(GT_LCL_VAR , "lclVar" ,0,GTK_LEAF|GTK_LOCAL) // local variable -GTNODE(GT_LCL_FLD , "lclFld" ,0,GTK_LEAF|GTK_LOCAL) // field in a non-primitive variable -GTNODE(GT_LCL_VAR_ADDR , "&lclVar" ,0,GTK_LEAF) // address of local variable -GTNODE(GT_LCL_FLD_ADDR , "&lclFld" ,0,GTK_LEAF) // address of field in a non-primitive variable -GTNODE(GT_STORE_LCL_VAR , "st.lclVar" ,0,GTK_UNOP|GTK_LOCAL) // store to local variable -GTNODE(GT_STORE_LCL_FLD , "st.lclFld" ,0,GTK_UNOP|GTK_LOCAL) // store to field in a non-primitive variable -GTNODE(GT_CATCH_ARG , "catchArg" ,0,GTK_LEAF) // Exception object in a catch block -GTNODE(GT_LABEL , "codeLabel" ,0,GTK_LEAF) // Jump-target -GTNODE(GT_FTN_ADDR , "ftnAddr" ,0,GTK_LEAF) // Address of a function -GTNODE(GT_RET_EXPR , "retExpr" ,0,GTK_LEAF) // Place holder for the return expression from an inline candidate +GTNODE(LCL_VAR , "lclVar" ,0,GTK_LEAF|GTK_LOCAL) // local variable +GTNODE(LCL_FLD , "lclFld" ,0,GTK_LEAF|GTK_LOCAL) // field in a non-primitive variable +GTNODE(LCL_VAR_ADDR , "&lclVar" ,0,GTK_LEAF) // address of local variable +GTNODE(LCL_FLD_ADDR , "&lclFld" ,0,GTK_LEAF) // address of field in a non-primitive variable +GTNODE(STORE_LCL_VAR , "st.lclVar" ,0,GTK_UNOP|GTK_LOCAL) // store to local variable +GTNODE(STORE_LCL_FLD , "st.lclFld" ,0,GTK_UNOP|GTK_LOCAL) // store to field in a non-primitive variable +GTNODE(CATCH_ARG , "catchArg" ,0,GTK_LEAF) // Exception object in a catch block +GTNODE(LABEL , "codeLabel" ,0,GTK_LEAF) // Jump-target +GTNODE(FTN_ADDR , "ftnAddr" ,0,GTK_LEAF) // Address of a function +GTNODE(RET_EXPR , "retExpr" ,0,GTK_LEAF) // Place holder for the return expression from an inline candidate //----------------------------------------------------------------------------- // Constant nodes: //----------------------------------------------------------------------------- -GTNODE(GT_CNS_INT , "const" ,0,GTK_LEAF|GTK_CONST) -GTNODE(GT_CNS_LNG , "lconst" ,0,GTK_LEAF|GTK_CONST) -GTNODE(GT_CNS_DBL , "dconst" ,0,GTK_LEAF|GTK_CONST) -GTNODE(GT_CNS_STR , "sconst" ,0,GTK_LEAF|GTK_CONST) +GTNODE(CNS_INT , "const" ,0,GTK_LEAF|GTK_CONST) +GTNODE(CNS_LNG , "lconst" ,0,GTK_LEAF|GTK_CONST) +GTNODE(CNS_DBL , "dconst" ,0,GTK_LEAF|GTK_CONST) +GTNODE(CNS_STR , "sconst" ,0,GTK_LEAF|GTK_CONST) //----------------------------------------------------------------------------- // Unary operators (1 operand): //----------------------------------------------------------------------------- -GTNODE(GT_NOT , "~" ,0,GTK_UNOP) -GTNODE(GT_NOP , "nop" ,0,GTK_UNOP) -GTNODE(GT_NEG , "unary -" ,0,GTK_UNOP) -GTNODE(GT_COPY , "copy" ,0,GTK_UNOP) // Copies a variable from its current location to a register that satisfies +GTNODE(NOT , "~" ,0,GTK_UNOP) +GTNODE(NOP , "nop" ,0,GTK_UNOP) +GTNODE(NEG , "unary -" ,0,GTK_UNOP) +GTNODE(COPY , "copy" ,0,GTK_UNOP) // Copies a variable from its current location to a register that satisfies // code generation constraints. The child is the actual lclVar node. -GTNODE(GT_RELOAD , "reload" ,0,GTK_UNOP) -GTNODE(GT_CHS , "flipsign" ,0,GTK_BINOP|GTK_ASGOP) // GT_CHS is actually unary -- op2 is ignored. +GTNODE(RELOAD , "reload" ,0,GTK_UNOP) +GTNODE(CHS , "flipsign" ,0,GTK_BINOP|GTK_ASGOP) // GT_CHS is actually unary -- op2 is ignored. // Changing to unary presently causes problems, though -- take a little work to fix. -GTNODE(GT_ARR_LENGTH , "arrLen" ,0,GTK_UNOP|GTK_EXOP) // array-length +GTNODE(ARR_LENGTH , "arrLen" ,0,GTK_UNOP|GTK_EXOP) // array-length -#if INLINE_MATH -GTNODE(GT_MATH , "mathFN" ,0,GTK_BINOP|GTK_EXOP) // Math functions/operators/intrinsics -#endif +GTNODE(INTRINSIC , "intrinsic" ,0,GTK_BINOP|GTK_EXOP) // intrinsics - //Interlocked intrinsics -GTNODE(GT_LOCKADD , "lockAdd" ,0,GTK_BINOP) -GTNODE(GT_XADD , "XAdd" ,0,GTK_BINOP) -GTNODE(GT_XCHG , "Xchg" ,0,GTK_BINOP) -GTNODE(GT_CMPXCHG , "cmpxchg" ,0,GTK_SPECIAL) -GTNODE(GT_MEMORYBARRIER , "memoryBarrier" ,0,GTK_LEAF) +GTNODE(LOCKADD , "lockAdd" ,0,GTK_BINOP) +GTNODE(XADD , "XAdd" ,0,GTK_BINOP) +GTNODE(XCHG , "Xchg" ,0,GTK_BINOP) +GTNODE(CMPXCHG , "cmpxchg" ,0,GTK_SPECIAL) +GTNODE(MEMORYBARRIER , "memoryBarrier" ,0,GTK_LEAF) -GTNODE(GT_CAST , "cast" ,0,GTK_UNOP|GTK_EXOP) // conversion to another type -GTNODE(GT_CKFINITE , "ckfinite" ,0,GTK_UNOP) // Check for NaN -GTNODE(GT_LCLHEAP , "lclHeap" ,0,GTK_UNOP) // alloca() -GTNODE(GT_JMP , "jump" ,0,GTK_LEAF) // Jump to another function +GTNODE(CAST , "cast" ,0,GTK_UNOP|GTK_EXOP) // conversion to another type +GTNODE(CKFINITE , "ckfinite" ,0,GTK_UNOP) // Check for NaN +GTNODE(LCLHEAP , "lclHeap" ,0,GTK_UNOP) // alloca() +GTNODE(JMP , "jump" ,0,GTK_LEAF) // Jump to another function -GTNODE(GT_ADDR , "addr" ,0,GTK_UNOP) // address of -GTNODE(GT_IND , "indir" ,0,GTK_UNOP) // load indirection -GTNODE(GT_STOREIND , "storeIndir" ,0,GTK_BINOP) // store indirection +GTNODE(ADDR , "addr" ,0,GTK_UNOP) // address of +GTNODE(IND , "indir" ,0,GTK_UNOP) // load indirection +GTNODE(STOREIND , "storeIndir" ,0,GTK_BINOP) // store indirection // TODO-Cleanup: GT_ARR_BOUNDS_CHECK should be made a GTK_BINOP now that it has only two child nodes -GTNODE(GT_ARR_BOUNDS_CHECK , "arrBndsChk" ,0,GTK_SPECIAL) // array bounds check -GTNODE(GT_LDOBJ , "ldobj" ,0,GTK_UNOP|GTK_EXOP) -GTNODE(GT_BOX , "box" ,0,GTK_UNOP|GTK_EXOP) +GTNODE(ARR_BOUNDS_CHECK , "arrBndsChk" ,0,GTK_SPECIAL) // array bounds check +GTNODE(LDOBJ , "ldobj" ,0,GTK_UNOP|GTK_EXOP) +GTNODE(BOX , "box" ,0,GTK_UNOP|GTK_EXOP) #ifdef FEATURE_SIMD -GTNODE(GT_SIMD_CHK , "simdChk" ,0,GTK_SPECIAL) // Compare whether an index is less than the given SIMD vector length, and call CORINFO_HELP_RNGCHKFAIL if not. +GTNODE(SIMD_CHK , "simdChk" ,0,GTK_SPECIAL) // Compare whether an index is less than the given SIMD vector length, and call CORINFO_HELP_RNGCHKFAIL if not. // TODO-CQ: In future may want to add a field that specifies different exceptions but we'll // need VM assistance for that. // TODO-CQ: It would actually be very nice to make this an unconditional throw, and expose the control flow that @@ -92,149 +89,158 @@ GTNODE(GT_SIMD_CHK , "simdChk" ,0,GTK_SPECIAL) // Compare wh // Binary operators (2 operands): //----------------------------------------------------------------------------- -GTNODE(GT_ADD , "+" ,1,GTK_BINOP) -GTNODE(GT_SUB , "-" ,0,GTK_BINOP) -GTNODE(GT_MUL , "*" ,1,GTK_BINOP) -GTNODE(GT_DIV , "/" ,0,GTK_BINOP) -GTNODE(GT_MOD , "%" ,0,GTK_BINOP) +GTNODE(ADD , "+" ,1,GTK_BINOP) +GTNODE(SUB , "-" ,0,GTK_BINOP) +GTNODE(MUL , "*" ,1,GTK_BINOP) +GTNODE(DIV , "/" ,0,GTK_BINOP) +GTNODE(MOD , "%" ,0,GTK_BINOP) -GTNODE(GT_UDIV , "/" ,0,GTK_BINOP) -GTNODE(GT_UMOD , "%" ,0,GTK_BINOP) +GTNODE(UDIV , "/" ,0,GTK_BINOP) +GTNODE(UMOD , "%" ,0,GTK_BINOP) -GTNODE(GT_OR , "|" ,1,GTK_BINOP|GTK_LOGOP) -GTNODE(GT_XOR , "^" ,1,GTK_BINOP|GTK_LOGOP) -GTNODE(GT_AND , "&" ,1,GTK_BINOP|GTK_LOGOP) +GTNODE(OR , "|" ,1,GTK_BINOP|GTK_LOGOP) +GTNODE(XOR , "^" ,1,GTK_BINOP|GTK_LOGOP) +GTNODE(AND , "&" ,1,GTK_BINOP|GTK_LOGOP) -GTNODE(GT_LSH , "<<" ,0,GTK_BINOP) -GTNODE(GT_RSH , ">>" ,0,GTK_BINOP) -GTNODE(GT_RSZ , ">>>" ,0,GTK_BINOP) -GTNODE(GT_ROL , "rol" ,0,GTK_BINOP) -GTNODE(GT_ROR , "ror" ,0,GTK_BINOP) -GTNODE(GT_MULHI , "mulhi" ,1,GTK_BINOP) // returns high bits (top N bits of the 2N bit result of an NxN multiply) +GTNODE(LSH , "<<" ,0,GTK_BINOP) +GTNODE(RSH , ">>" ,0,GTK_BINOP) +GTNODE(RSZ , ">>>" ,0,GTK_BINOP) +GTNODE(ROL , "rol" ,0,GTK_BINOP) +GTNODE(ROR , "ror" ,0,GTK_BINOP) +GTNODE(MULHI , "mulhi" ,1,GTK_BINOP) // returns high bits (top N bits of the 2N bit result of an NxN multiply) -GTNODE(GT_ASG , "=" ,0,GTK_BINOP|GTK_ASGOP) -GTNODE(GT_ASG_ADD , "+=" ,0,GTK_BINOP|GTK_ASGOP) -GTNODE(GT_ASG_SUB , "-=" ,0,GTK_BINOP|GTK_ASGOP) -GTNODE(GT_ASG_MUL , "*=" ,0,GTK_BINOP|GTK_ASGOP) -GTNODE(GT_ASG_DIV , "/=" ,0,GTK_BINOP|GTK_ASGOP) -GTNODE(GT_ASG_MOD , "%=" ,0,GTK_BINOP|GTK_ASGOP) +GTNODE(ASG , "=" ,0,GTK_BINOP|GTK_ASGOP) +GTNODE(ASG_ADD , "+=" ,0,GTK_BINOP|GTK_ASGOP) +GTNODE(ASG_SUB , "-=" ,0,GTK_BINOP|GTK_ASGOP) +GTNODE(ASG_MUL , "*=" ,0,GTK_BINOP|GTK_ASGOP) +GTNODE(ASG_DIV , "/=" ,0,GTK_BINOP|GTK_ASGOP) +GTNODE(ASG_MOD , "%=" ,0,GTK_BINOP|GTK_ASGOP) -GTNODE(GT_ASG_UDIV , "/=" ,0,GTK_BINOP|GTK_ASGOP) -GTNODE(GT_ASG_UMOD , "%=" ,0,GTK_BINOP|GTK_ASGOP) +GTNODE(ASG_UDIV , "/=" ,0,GTK_BINOP|GTK_ASGOP) +GTNODE(ASG_UMOD , "%=" ,0,GTK_BINOP|GTK_ASGOP) -GTNODE(GT_ASG_OR , "|=" ,0,GTK_BINOP|GTK_ASGOP) -GTNODE(GT_ASG_XOR , "^=" ,0,GTK_BINOP|GTK_ASGOP) -GTNODE(GT_ASG_AND , "&=" ,0,GTK_BINOP|GTK_ASGOP) -GTNODE(GT_ASG_LSH , "<<=" ,0,GTK_BINOP|GTK_ASGOP) -GTNODE(GT_ASG_RSH , ">>=" ,0,GTK_BINOP|GTK_ASGOP) -GTNODE(GT_ASG_RSZ , ">>>=" ,0,GTK_BINOP|GTK_ASGOP) +GTNODE(ASG_OR , "|=" ,0,GTK_BINOP|GTK_ASGOP) +GTNODE(ASG_XOR , "^=" ,0,GTK_BINOP|GTK_ASGOP) +GTNODE(ASG_AND , "&=" ,0,GTK_BINOP|GTK_ASGOP) +GTNODE(ASG_LSH , "<<=" ,0,GTK_BINOP|GTK_ASGOP) +GTNODE(ASG_RSH , ">>=" ,0,GTK_BINOP|GTK_ASGOP) +GTNODE(ASG_RSZ , ">>>=" ,0,GTK_BINOP|GTK_ASGOP) -GTNODE(GT_EQ , "==" ,0,GTK_BINOP|GTK_RELOP) -GTNODE(GT_NE , "!=" ,0,GTK_BINOP|GTK_RELOP) -GTNODE(GT_LT , "<" ,0,GTK_BINOP|GTK_RELOP) -GTNODE(GT_LE , "<=" ,0,GTK_BINOP|GTK_RELOP) -GTNODE(GT_GE , ">=" ,0,GTK_BINOP|GTK_RELOP) -GTNODE(GT_GT , ">" ,0,GTK_BINOP|GTK_RELOP) +GTNODE(EQ , "==" ,0,GTK_BINOP|GTK_RELOP) +GTNODE(NE , "!=" ,0,GTK_BINOP|GTK_RELOP) +GTNODE(LT , "<" ,0,GTK_BINOP|GTK_RELOP) +GTNODE(LE , "<=" ,0,GTK_BINOP|GTK_RELOP) +GTNODE(GE , ">=" ,0,GTK_BINOP|GTK_RELOP) +GTNODE(GT , ">" ,0,GTK_BINOP|GTK_RELOP) -GTNODE(GT_COMMA , "comma" ,0,GTK_BINOP) +GTNODE(COMMA , "comma" ,0,GTK_BINOP) -GTNODE(GT_QMARK , "qmark" ,0,GTK_BINOP|GTK_EXOP) -GTNODE(GT_COLON , "colon" ,0,GTK_BINOP) +GTNODE(QMARK , "qmark" ,0,GTK_BINOP|GTK_EXOP) +GTNODE(COLON , "colon" ,0,GTK_BINOP) -GTNODE(GT_INDEX , "[]" ,0,GTK_BINOP|GTK_EXOP) // SZ-array-element +GTNODE(INDEX , "[]" ,0,GTK_BINOP|GTK_EXOP) // SZ-array-element -GTNODE(GT_MKREFANY , "mkrefany" ,0,GTK_BINOP) +GTNODE(MKREFANY , "mkrefany" ,0,GTK_BINOP) -GTNODE(GT_LEA , "lea" ,0,GTK_BINOP|GTK_EXOP) +GTNODE(LEA , "lea" ,0,GTK_BINOP|GTK_EXOP) #if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_) // A GT_LONG node simply represents the long value produced by the concatenation // of its two (lower and upper half) operands. Some GT_LONG nodes are transient, // during the decomposing of longs; others are handled by codegen as operands of // nodes such as calls, returns and stores of long lclVars. -GTNODE(GT_LONG , "long" ,0,GTK_BINOP) +GTNODE(LONG , "long" ,0,GTK_BINOP) + +// The following are nodes representing the upper half of a 64-bit operation +// that requires a carry/borrow. However, they are all named GT_XXX_HI for +// consistency. +GTNODE(ADD_HI , "+Hi" ,1,GTK_BINOP|GTK_EXOP) +GTNODE(SUB_HI , "-Hi" ,0,GTK_BINOP|GTK_EXOP) +GTNODE(MUL_HI , "*Hi" ,1,GTK_BINOP|GTK_EXOP) +GTNODE(DIV_HI , "/Hi" ,0,GTK_BINOP|GTK_EXOP) +GTNODE(MOD_HI , "%Hi" ,0,GTK_BINOP|GTK_EXOP) #endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_) #ifdef FEATURE_SIMD -GTNODE(GT_SIMD , "simd" ,0,GTK_BINOP|GTK_EXOP) // SIMD functions/operators/intrinsics +GTNODE(SIMD , "simd" ,0,GTK_BINOP|GTK_EXOP) // SIMD functions/operators/intrinsics #endif // FEATURE_SIMD //----------------------------------------------------------------------------- // Other nodes that look like unary/binary operators: //----------------------------------------------------------------------------- -GTNODE(GT_JTRUE , "jmpTrue" ,0,GTK_UNOP) +GTNODE(JTRUE , "jmpTrue" ,0,GTK_UNOP) -GTNODE(GT_LIST , "<list>" ,0,GTK_BINOP) +GTNODE(LIST , "<list>" ,0,GTK_BINOP) //----------------------------------------------------------------------------- // Other nodes that have special structure: //----------------------------------------------------------------------------- -GTNODE(GT_FIELD , "field" ,0,GTK_SPECIAL) // Member-field -GTNODE(GT_ARR_ELEM , "arrMD&" ,0,GTK_SPECIAL) // Multi-dimensional array-element address -GTNODE(GT_ARR_INDEX , "arrMDIdx" ,0,GTK_BINOP|GTK_EXOP) // Effective, bounds-checked index for one dimension of a multi-dimensional array element -GTNODE(GT_ARR_OFFSET , "arrMDOffs" ,0,GTK_SPECIAL) // Flattened offset of multi-dimensional array element -GTNODE(GT_CALL , "call()" ,0,GTK_SPECIAL) +GTNODE(FIELD , "field" ,0,GTK_SPECIAL) // Member-field +GTNODE(ARR_ELEM , "arrMD&" ,0,GTK_SPECIAL) // Multi-dimensional array-element address +GTNODE(ARR_INDEX , "arrMDIdx" ,0,GTK_BINOP|GTK_EXOP) // Effective, bounds-checked index for one dimension of a multi-dimensional array element +GTNODE(ARR_OFFSET , "arrMDOffs" ,0,GTK_SPECIAL) // Flattened offset of multi-dimensional array element +GTNODE(CALL , "call()" ,0,GTK_SPECIAL) //----------------------------------------------------------------------------- // Statement operator nodes: //----------------------------------------------------------------------------- -GTNODE(GT_BEG_STMTS , "begStmts" ,0,GTK_SPECIAL) // used only temporarily in importer by impBegin/EndTreeList() -GTNODE(GT_STMT , "stmtExpr" ,0,GTK_SPECIAL) // top-level list nodes in bbTreeList +GTNODE(BEG_STMTS , "begStmts" ,0,GTK_SPECIAL) // used only temporarily in importer by impBegin/EndTreeList() +GTNODE(STMT , "stmtExpr" ,0,GTK_SPECIAL) // top-level list nodes in bbTreeList -GTNODE(GT_RETURN , "return" ,0,GTK_UNOP) // return from current function -GTNODE(GT_SWITCH , "switch" ,0,GTK_UNOP) // switch +GTNODE(RETURN , "return" ,0,GTK_UNOP) // return from current function +GTNODE(SWITCH , "switch" ,0,GTK_UNOP) // switch -GTNODE(GT_NO_OP , "no_op" ,0,GTK_LEAF) // nop! +GTNODE(NO_OP , "no_op" ,0,GTK_LEAF) // nop! -GTNODE(GT_START_NONGC, "start_nongc",0,GTK_LEAF) // starts a new instruction group that will be non-gc interruptible +GTNODE(START_NONGC, "start_nongc",0,GTK_LEAF) // starts a new instruction group that will be non-gc interruptible -GTNODE(GT_PROF_HOOK , "prof_hook" ,0,GTK_LEAF) // profiler Enter/Leave/TailCall hook +GTNODE(PROF_HOOK , "prof_hook" ,0,GTK_LEAF) // profiler Enter/Leave/TailCall hook -GTNODE(GT_RETFILT , "retfilt", 0,GTK_UNOP) // end filter with TYP_I_IMPL return value +GTNODE(RETFILT , "retfilt", 0,GTK_UNOP) // end filter with TYP_I_IMPL return value #if !FEATURE_EH_FUNCLETS -GTNODE(GT_END_LFIN , "endLFin" ,0,GTK_LEAF) // end locally-invoked finally +GTNODE(END_LFIN , "endLFin" ,0,GTK_LEAF) // end locally-invoked finally #endif // !FEATURE_EH_FUNCLETS -GTNODE(GT_INITBLK , "initBlk" ,0,GTK_BINOP) -GTNODE(GT_COPYBLK , "copyBlk" ,0,GTK_BINOP) -GTNODE(GT_COPYOBJ , "copyObj" ,0,GTK_BINOP) +GTNODE(INITBLK , "initBlk" ,0,GTK_BINOP) +GTNODE(COPYBLK , "copyBlk" ,0,GTK_BINOP) +GTNODE(COPYOBJ , "copyObj" ,0,GTK_BINOP) //----------------------------------------------------------------------------- // Nodes used for optimizations. //----------------------------------------------------------------------------- -GTNODE(GT_PHI , "phi" ,0,GTK_UNOP) // phi node for ssa. -GTNODE(GT_PHI_ARG , "phiArg" ,0,GTK_LEAF|GTK_LOCAL) // phi(phiarg, phiarg, phiarg) +GTNODE(PHI , "phi" ,0,GTK_UNOP) // phi node for ssa. +GTNODE(PHI_ARG , "phiArg" ,0,GTK_LEAF|GTK_LOCAL) // phi(phiarg, phiarg, phiarg) //----------------------------------------------------------------------------- // Nodes used by Lower to generate a closer CPU representation of other nodes //----------------------------------------------------------------------------- -GTNODE(GT_JMPTABLE , "jumpTable" , 0, GTK_LEAF) // Generates the jump table for switches -GTNODE(GT_SWITCH_TABLE, "tableSwitch", 0, GTK_BINOP) // Jump Table based switch construct +GTNODE(JMPTABLE , "jumpTable" , 0, GTK_LEAF) // Generates the jump table for switches +GTNODE(SWITCH_TABLE, "tableSwitch", 0, GTK_BINOP) // Jump Table based switch construct //----------------------------------------------------------------------------- // Nodes used only within the code generator: //----------------------------------------------------------------------------- -GTNODE(GT_REG_VAR , "regVar" ,0,GTK_LEAF|GTK_LOCAL) // register variable -GTNODE(GT_CLS_VAR , "clsVar" ,0,GTK_LEAF) // static data member -GTNODE(GT_CLS_VAR_ADDR , "&clsVar" ,0,GTK_LEAF) // static data member address -GTNODE(GT_STORE_CLS_VAR, "st.clsVar" ,0,GTK_LEAF) // store to static data member -GTNODE(GT_ARGPLACE , "argPlace" ,0,GTK_LEAF) // placeholder for a register arg -GTNODE(GT_NULLCHECK , "nullcheck" ,0,GTK_UNOP) // null checks the source -GTNODE(GT_PHYSREG , "physregSrc" ,0,GTK_LEAF) // read from a physical register -GTNODE(GT_PHYSREGDST , "physregDst" ,0,GTK_UNOP) // write to a physical register -GTNODE(GT_EMITNOP , "emitnop" ,0,GTK_LEAF) // emitter-placed nop -GTNODE(GT_PINVOKE_PROLOG,"pinvoke_prolog",0,GTK_LEAF) // pinvoke prolog seq -GTNODE(GT_PINVOKE_EPILOG,"pinvoke_epilog",0,GTK_LEAF) // pinvoke epilog seq -GTNODE(GT_PUTARG_REG , "putarg_reg" ,0,GTK_UNOP) // operator that places outgoing arg in register -GTNODE(GT_PUTARG_STK , "putarg_stk" ,0,GTK_UNOP) // operator that places outgoing arg in stack -GTNODE(GT_RETURNTRAP , "returnTrap" ,0,GTK_UNOP) // a conditional call to wait on gc -GTNODE(GT_SWAP , "swap" ,0,GTK_BINOP) // op1 and op2 swap (registers) +GTNODE(REG_VAR , "regVar" ,0,GTK_LEAF|GTK_LOCAL) // register variable +GTNODE(CLS_VAR , "clsVar" ,0,GTK_LEAF) // static data member +GTNODE(CLS_VAR_ADDR , "&clsVar" ,0,GTK_LEAF) // static data member address +GTNODE(STORE_CLS_VAR, "st.clsVar" ,0,GTK_LEAF) // store to static data member +GTNODE(ARGPLACE , "argPlace" ,0,GTK_LEAF) // placeholder for a register arg +GTNODE(NULLCHECK , "nullcheck" ,0,GTK_UNOP) // null checks the source +GTNODE(PHYSREG , "physregSrc" ,0,GTK_LEAF) // read from a physical register +GTNODE(PHYSREGDST , "physregDst" ,0,GTK_UNOP) // write to a physical register +GTNODE(EMITNOP , "emitnop" ,0,GTK_LEAF) // emitter-placed nop +GTNODE(PINVOKE_PROLOG,"pinvoke_prolog",0,GTK_LEAF) // pinvoke prolog seq +GTNODE(PINVOKE_EPILOG,"pinvoke_epilog",0,GTK_LEAF) // pinvoke epilog seq +GTNODE(PUTARG_REG , "putarg_reg" ,0,GTK_UNOP) // operator that places outgoing arg in register +GTNODE(PUTARG_STK , "putarg_stk" ,0,GTK_UNOP) // operator that places outgoing arg in stack +GTNODE(RETURNTRAP , "returnTrap" ,0,GTK_UNOP) // a conditional call to wait on gc +GTNODE(SWAP , "swap" ,0,GTK_BINOP) // op1 and op2 swap (registers) /*****************************************************************************/ #undef GTNODE diff --git a/src/jit/gtstructs.h b/src/jit/gtstructs.h index 6cc35b6285..19a6b655b4 100644 --- a/src/jit/gtstructs.h +++ b/src/jit/gtstructs.h @@ -68,7 +68,7 @@ GTSTRUCT_1(Call , GT_CALL) GTSTRUCT_1(ArgList , GT_LIST) GTSTRUCT_1(Colon , GT_COLON) GTSTRUCT_1(FptrVal , GT_FTN_ADDR) -GTSTRUCT_1(Math , GT_MATH) +GTSTRUCT_1(Intrinsic , GT_INTRINSIC) GTSTRUCT_1(Index , GT_INDEX) #ifdef FEATURE_SIMD GTSTRUCT_2(BoundsChk , GT_ARR_BOUNDS_CHECK, GT_SIMD_CHK) diff --git a/src/jit/host.h b/src/jit/host.h index a2e143a4c9..e67d252732 100644 --- a/src/jit/host.h +++ b/src/jit/host.h @@ -28,8 +28,10 @@ private: BOOL vlogf(unsigned level, const char* fmt, va_list args); -void logf_stdout(const char* fmt, va_list args); -void logf(const char*, ...); +int logf_stdout(const char* fmt, va_list args); +int logf(const char*, ...); +void gcDump_logf(const char* fmt, ...); + void logf(unsigned level, const char* fmt, ...); extern "C" diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index 5204abed6b..49564eaf66 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -792,21 +792,31 @@ void Compiler::impAssignTempGen(unsigned tmpNum, { GenTreePtr asg; - if (val->TypeGet() == TYP_STRUCT) + if (varTypeIsStruct(val)) { - GenTreePtr dst = gtNewLclvNode(tmpNum, TYP_STRUCT); - assert(tmpNum < lvaCount); assert(structType != NO_CLASS_HANDLE); // if the method is non-verifiable the assert is not true // so at least ignore it in the case when verification is turned on - // since any block that tries to use the temp would have failed verification + // since any block that tries to use the temp would have failed verification. + var_types varType = lvaTable[tmpNum].lvType; assert(tiVerificationNeeded || - lvaTable[tmpNum].lvType == TYP_UNDEF || - lvaTable[tmpNum].lvType == TYP_STRUCT); + varType == TYP_UNDEF || + varTypeIsStruct(varType)); lvaSetStruct(tmpNum, structType, false); + + // Now, set the type of the struct value. Note that lvaSetStruct may modify the type + // of the lclVar to a specialized type (e.g. TYP_SIMD), based on the handle (structType) + // that has been passed in for the value being assigned to the temp, in which case we + // need to set 'val' to that same type. + // Note also that if we always normalized the types of any node that might be a struct + // type, this would not be necessary - but that requires additional JIT/EE interface + // calls that may not actually be required - e.g. if we only access a field of a struct. + + val->gtType = lvaTable[tmpNum].lvType; + GenTreePtr dst = gtNewLclvNode(tmpNum, val->gtType); asg = impAssignStruct(dst, val, structType, curLevel, pAfterStmt, block); } else @@ -868,7 +878,7 @@ GenTreeArgList* Compiler::impPopList(unsigned count, typeInfo ti = se.seTypeInfo; GenTreePtr temp = se.val; - if (temp->TypeGet() == TYP_STRUCT) + if (varTypeIsStruct(temp)) { // Morph trees that aren't already LDOBJs or MKREFANY to be LDOBJs assert(ti.IsType(TI_STRUCT)); @@ -933,7 +943,7 @@ GenTreeArgList* Compiler::impPopList(unsigned count, // Everett MC++ could generate IL with a mismatched valuetypes. It used to work with Everett JIT, // but it stopped working in Whidbey when we have started passing simple valuetypes as underlying primitive types. // We will try to adjust for this case here to avoid breaking customers code (see VSW 485789 for details). - if (corType == CORINFO_TYPE_VALUECLASS && args->Current()->TypeGet() != TYP_STRUCT) + if (corType == CORINFO_TYPE_VALUECLASS && !varTypeIsStruct(args->Current())) { args->Current() = impNormStructVal(args->Current(), argRealClass, (unsigned)CHECK_SPILL_ALL, true); } @@ -950,7 +960,7 @@ GenTreeArgList* Compiler::impPopList(unsigned count, // Ensure that IL is half-way sane (what was pushed is what the function expected) var_types argType = genActualType(args->Current()->TypeGet()); unsigned argSize = genTypeSize(argType); - if (argType == TYP_STRUCT) + if (varTypeIsStruct(argType)) { if (args->Current()->gtOper == GT_LDOBJ) argSize = info.compCompHnd->getClassSize(args->Current()->gtLdObj.gtClass); @@ -1104,11 +1114,11 @@ GenTreePtr Compiler::impAssignStruct(GenTreePtr dest, BasicBlock * block /* = NULL */ ) { - assert(dest->TypeGet() == TYP_STRUCT); + assert(varTypeIsStruct(dest)); while (dest->gtOper == GT_COMMA) { - assert(dest->gtOp.gtOp2->gtType == TYP_STRUCT); // Second thing is the struct + assert(varTypeIsStruct(dest->gtOp.gtOp2)); // Second thing is the struct // Append all the op1 of GT_COMMA trees before we evaluate op2 of the GT_COMMA tree. if (pAfterStmt) @@ -1152,8 +1162,8 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest, BasicBlock * block /* = NULL */ ) { - assert(src->TypeGet() == TYP_STRUCT || (src->gtOper == GT_ADDR && src->TypeGet() == TYP_BYREF)); #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + assert(varTypeIsStruct(src) || (src->gtOper == GT_ADDR && src->TypeGet() == TYP_BYREF)); // TODO-ARM-BUG: Does ARM need this? // TODO-ARM64-BUG: Does ARM64 need this? assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || @@ -1162,6 +1172,8 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest, src->gtOper == GT_RET_EXPR || src->gtOper == GT_COMMA || src->gtOper == GT_ADDR || GenTree::OperIsSIMD(src->gtOper)); #else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + assert(varTypeIsStruct(src)); + assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || src->gtOper == GT_IND || src->gtOper == GT_LDOBJ || src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY || @@ -1214,7 +1226,7 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest, // This flag will let us choose the correct write barrier. dest->gtFlags |= GTF_IND_TGTANYWHERE; } - + return gtNewAssignNode(dest, src); } } @@ -1291,7 +1303,7 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest, else if (src->gtOper == GT_COMMA) { // Second thing is the struct or it's address. - assert(src->gtOp.gtOp2->gtType == TYP_STRUCT || src->gtOp.gtOp2->gtType == TYP_BYREF); + assert(varTypeIsStruct(src->gtOp.gtOp2) || src->gtOp.gtOp2->gtType == TYP_BYREF); if (pAfterStmt) { * pAfterStmt = fgInsertStmtAfter(block, * pAfterStmt, gtNewStmt(src->gtOp.gtOp1, impCurStmtOffs)); @@ -1318,7 +1330,7 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest, } /***************************************************************************** - Given TYP_STRUCT value, and the class handle for that structure, return + Given a struct value, and the class handle for that structure, return the expression for the address for that structure value. willDeref - does the caller guarantee to dereference the pointer. @@ -1329,7 +1341,7 @@ GenTreePtr Compiler::impGetStructAddr(GenTreePtr structVal, unsigned curLevel, bool willDeref) { - assert(structVal->TypeGet() == TYP_STRUCT || eeIsValueClass(structHnd)); + assert(varTypeIsStruct(structVal) || eeIsValueClass(structHnd)); var_types type = structVal->TypeGet(); @@ -1348,8 +1360,9 @@ GenTreePtr Compiler::impGetStructAddr(GenTreePtr structVal, // The 'return value' is now the temp itself + type = genActualType(lvaTable[tmpNum].TypeGet()); GenTreePtr temp = gtNewLclvNode(tmpNum, type); - temp = gtNewOperNode(GT_ADDR, TYP_I_IMPL, temp); + temp = gtNewOperNode(GT_ADDR, TYP_BYREF, temp); return temp; } else if (oper == GT_COMMA) @@ -1376,17 +1389,174 @@ GenTreePtr Compiler::impGetStructAddr(GenTreePtr structVal, return(gtNewOperNode(GT_ADDR, TYP_BYREF, structVal)); } -/***************************************************************************** - * Given TYP_STRUCT value 'structVal', make certain it is 'canonical', that is - * it is either a LDOBJ or a MKREFANY node */ +//------------------------------------------------------------------------ +// impNormStructType: Given a (known to be) struct class handle structHnd, normalize its type, +// and optionally determine the GC layout of the struct. +// +// Arguments: +// structHnd - The class handle for the struct type of interest. +// gcLayout - (optional, default nullptr) - a BYTE pointer, allocated by the caller, +// into which the gcLayout will be written. +// pNumGCVars - (optional, default nullptr) - if non-null, a pointer to an unsigned, +// which will be set to the number of GC fields in the struct. +// +// Return Value: +// The JIT type for the struct (e.g. TYP_STRUCT, or TYP_SIMD*). +// The gcLayout will be returned using the pointers provided by the caller, if non-null. +// It may also modify the compFloatingPointUsed flag if the type is a SIMD type. +// +// Assumptions: +// The caller must set gcLayout to nullptr OR ensure that it is large enough +// (see ICorStaticInfo::getClassGClayout in corinfo.h). +// +// Notes: +// Normalizing the type involves examining the struct type to determine if it should +// be modified to one that is handled specially by the JIT, possibly being a candidate +// for full enregistration, e.g. TYP_SIMD16. + +var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd, + BYTE* gcLayout, + unsigned* pNumGCVars, + var_types* pSimdBaseType) +{ + assert(structHnd != NO_CLASS_HANDLE); + unsigned originalSize = info.compCompHnd->getClassSize(structHnd); + unsigned numGCVars = 0; + var_types structType = TYP_STRUCT; + var_types simdBaseType = TYP_UNKNOWN; + bool definitelyHasGCPtrs = false; + +#ifdef FEATURE_SIMD + // We don't want to consider this as a possible SIMD type if it has GC pointers. + // (Saves querying about the SIMD assembly.) + BYTE gcBytes[maxPossibleSIMDStructBytes / TARGET_POINTER_SIZE]; + if ((gcLayout == nullptr) && + (originalSize >= minSIMDStructBytes()) && + (originalSize <= maxSIMDStructBytes())) + { + gcLayout = gcBytes; + } +#endif // FEATURE_SIMD + if (gcLayout != nullptr) + { + numGCVars = info.compCompHnd->getClassGClayout(structHnd, gcLayout); + definitelyHasGCPtrs = (numGCVars != 0); + } +#ifdef FEATURE_SIMD + // Check to see if this is a SIMD type. + if (featureSIMD && + (originalSize <= getSIMDVectorRegisterByteLength()) && + (originalSize >= TARGET_POINTER_SIZE) && + !definitelyHasGCPtrs) + { + unsigned int sizeBytes; + simdBaseType = getBaseTypeAndSizeOfSIMDType(structHnd, &sizeBytes); + if (simdBaseType != TYP_UNKNOWN) + { + assert(sizeBytes == originalSize); + structType = getSIMDTypeForSize(sizeBytes); + if (pSimdBaseType != nullptr) + { + *pSimdBaseType = simdBaseType; + } +#ifdef _TARGET_AMD64_ + // Amd64: also indicate that we use floating point registers + compFloatingPointUsed = true; +#endif + } + } +#endif //FEATURE_SIMD + if (pNumGCVars != nullptr) + { + *pNumGCVars = numGCVars; + } + return structType; +} + +//**************************************************************************** +// Given TYP_STRUCT value 'structVal', make sure it is 'canonical' +// is must be either a LDOBJ or a MKREFANY node +// GenTreePtr Compiler::impNormStructVal(GenTreePtr structVal, - CORINFO_CLASS_HANDLE structType, + CORINFO_CLASS_HANDLE structHnd, unsigned curLevel, bool forceNormalization /*=false*/) { - assert(forceNormalization || structVal->TypeGet() == TYP_STRUCT); - assert(structType != NO_CLASS_HANDLE); + assert(forceNormalization || varTypeIsStruct(structVal)); + assert(structHnd != NO_CLASS_HANDLE); + var_types structType = structVal->TypeGet(); + if (structType == TYP_STRUCT) + { + structType = impNormStructType(structHnd); + } + + genTreeOps oper = structVal->OperGet(); + switch (oper) + { + // GT_RETURN and GT_MKREFANY don't capture the handle. + case GT_RETURN: + case GT_MKREFANY: + break; + + case GT_CALL: + structVal->gtCall.gtRetClsHnd = structHnd; + structVal->gtType = structType; + break; + + case GT_RET_EXPR: + structVal->gtRetExpr.gtRetClsHnd = structHnd; + structVal->gtType = structType; + break; + + case GT_ARGPLACE: + structVal->gtArgPlace.gtArgPlaceClsHnd = structHnd; + structVal->gtType = structType; + break; + + case GT_IND: + structVal->gtType = structType; + break; + + case GT_INDEX: + structVal->gtIndex.gtStructElemClass = structHnd; + structVal->gtIndex.gtIndElemSize = info.compCompHnd->getClassSize(structHnd); + structVal->gtType = structType; + break; + + case GT_FIELD: + structVal->gtType = structType; + break; + + case GT_LCL_VAR: + case GT_LCL_FLD: + break; + + case GT_LDOBJ: + // These should already have the appropriate type. + assert(structVal->gtType == structType); + break; + +#ifdef FEATURE_SIMD + case GT_SIMD: + // These don't preserve the handle. + assert(varTypeIsSIMD(structVal)); + break; +#endif // FEATURE_SIMD + + case GT_COMMA: + { + GenTree* op2 = structVal->gtOp.gtOp2; + impNormStructVal(op2, structHnd, curLevel, forceNormalization); + structType = op2->TypeGet(); + structVal->gtType = structType; + } + break; + + default: + assert(!"Unexpected node in impNormStructVal()"); + break; + } // Is it already normalized? if (!forceNormalization && (structVal->gtOper == GT_MKREFANY || structVal->gtOper == GT_LDOBJ)) @@ -1394,11 +1564,30 @@ GenTreePtr Compiler::impNormStructVal(GenTreePtr structVal, // Normalize it by wraping it in a LDOBJ - structVal = impGetStructAddr(structVal, structType, curLevel, !forceNormalization); // get the addr of struct - structVal = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, structVal, structType - ); - structVal->gtFlags |= GTF_EXCEPT; - return(structVal); + GenTreePtr structAddr = impGetStructAddr(structVal, structHnd, curLevel, !forceNormalization); // get the addr of struct + GenTreePtr structLdobj = new (this, GT_LDOBJ) GenTreeLdObj(structType, structAddr, structHnd); + + if (structAddr->gtOper == GT_ADDR) + { + // structVal can start off as a GT_RET_EXPR that + // gets changed into a GT_LCL_VAR by impGetStructAddr + // when it calls impAssignTempGen() + structVal = structAddr->gtOp.gtOp1; + } + if (structVal->IsLocal()) + { + // A LDOBJ on a ADDR(LCL_VAR) can never raise an exception + // so we don't set GTF_EXCEPT here. + // + // TODO-CQ: Clear the GTF_GLOB_REF flag on structLdobj as well + // but this needs additional work when inlining. + } + else + { + // In general a LDOBJ is an IND and could raise an exception + structLdobj->gtFlags |= GTF_EXCEPT; + } + return(structLdobj); } @@ -1834,12 +2023,11 @@ bool Compiler::impSpillStackEntry(unsigned level, } } - /* get the original type of the tree (it may be wacked by impAssignTempGen) */ - var_types type = genActualType(tree->gtType); - /* Assign the spilled entry to the temp */ impAssignTempGen(tnum, tree, verCurrentState.esStack[level].seTypeInfo.GetClassHandle(), level); + // The tree type may be modified by impAssignTempGen, so use the type of the lclVar. + var_types type = genActualType(lvaTable[tnum].TypeGet()); GenTreePtr temp = gtNewLclvNode(tnum, type); verCurrentState.esStack[level].val = temp; @@ -2171,11 +2359,13 @@ GenTreePtr Compiler::impCloneExpr(GenTreePtr tree, unsigned temp = lvaGrabTemp(true DEBUGARG(reason)); - // impAssignTempGen() bashes tree->gtType to TYP_VOID for calls which - // return TYP_STRUCT. So cache it - var_types type = genActualType(tree->TypeGet()); + // impAssignTempGen() may change tree->gtType to TYP_VOID for calls which + // return a struct type. It also may modify the struct type to a more + // specialized type (e.g. a SIMD type). So we will get the type from + // the lclVar AFTER calling impAssignTempGen(). impAssignTempGen(temp, tree, structHnd, curLevel, pAfterStmt, impCurStmtOffs); + var_types type = genActualType(lvaTable[temp].TypeGet()); *pClone = gtNewLclvNode(temp, type); return gtNewLclvNode(temp, type); @@ -2791,13 +2981,13 @@ GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, if (opts.compDbgCode || opts.MinOpts()) { *pIntrinsicID = CORINFO_INTRINSIC_Illegal; - return NULL; + return nullptr; } *pIntrinsicID = intrinsicID; if (intrinsicID < 0 || CORINFO_INTRINSIC_Count <= intrinsicID) - return NULL; + return nullptr; // Currently we don't have CORINFO_INTRINSIC_Exp because it does not // seem to work properly for Infinity values, we don't do @@ -2810,104 +3000,107 @@ GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, switch (intrinsicID) { GenTreePtr op1, op2; - -#if defined(_TARGET_AMD64_) - // Amd64 has no SSE2 instruction to directly compute sin/cos/round. - // Sqrt/Abs are the only FP intrinsics supported by Jit64. - // Any additional FP intrinsics we wish to add should be based on - // target framework version so as not to break back compat with Jit64 - // due to precision differences. - case CORINFO_INTRINSIC_Sqrt: - case CORINFO_INTRINSIC_Abs: -#elif defined(_TARGET_ARM64_) - case CORINFO_INTRINSIC_Sqrt: - case CORINFO_INTRINSIC_Abs: - case CORINFO_INTRINSIC_Round: - NYI_ARM64("impIntrinsic"); // Decide which Intrinsics we will support for Arm64 -#elif defined(_TARGET_ARM_) - // there is no small instruction seq for sin and cos on ARM - case CORINFO_INTRINSIC_Sqrt: - case CORINFO_INTRINSIC_Abs: - case CORINFO_INTRINSIC_Round: -#elif defined(_TARGET_X86_) - case CORINFO_INTRINSIC_Sin: - case CORINFO_INTRINSIC_Cos: - case CORINFO_INTRINSIC_Sqrt: - case CORINFO_INTRINSIC_Abs: - case CORINFO_INTRINSIC_Round: -#else -#error Unsupported or unset target architecture -#endif //_TARGET_AMD64_ - + + case CORINFO_INTRINSIC_Sin: + case CORINFO_INTRINSIC_Sqrt: + case CORINFO_INTRINSIC_Abs: + case CORINFO_INTRINSIC_Cos: + case CORINFO_INTRINSIC_Round: + case CORINFO_INTRINSIC_Cosh: + case CORINFO_INTRINSIC_Sinh: + case CORINFO_INTRINSIC_Tan: + case CORINFO_INTRINSIC_Tanh: + case CORINFO_INTRINSIC_Asin: + case CORINFO_INTRINSIC_Acos: + case CORINFO_INTRINSIC_Atan: + case CORINFO_INTRINSIC_Atan2: + case CORINFO_INTRINSIC_Log10: + case CORINFO_INTRINSIC_Pow: + case CORINFO_INTRINSIC_Exp: + case CORINFO_INTRINSIC_Ceiling: + case CORINFO_INTRINSIC_Floor: + + // These are math intrinsics + assert(callType != TYP_STRUCT); - switch (sig->numArgs) + op1 = nullptr; + +#ifdef LEGACY_BACKEND + if (IsTargetIntrinsic(intrinsicID)) +#else + // Intrinsics that are not implemented directly by target intructions will + // be rematerialized as users calls in rationalizer. +#endif { - case 0: - // It seems that all the math intrinsics listed take a single argument, so this - // case will never currently be taken. If this changes, and we get zero-arg intrinsics - // (can't actually imagine one -- it has to return a constant, right?), we would remove - // the assertion, uncomment the code below, and add "GT_MATH" to list of unary operators - // with optional arguments (in the GenTreeOp constructor for zero "op" arguments.) - assert(false); - // op1 = new (this, GT_MATH) GenTreeOp(GT_MATH, genActualType(callType)); - // Instead, for now: - op1 = NULL; - break; - - case 1: - op1 = impPopStack().val; + switch (sig->numArgs) + { + case 0: + // It seems that all the math intrinsics listed take a single argument, so this + // case will never currently be taken. + assert(false); + op1 = nullptr; + break; + + case 1: + op1 = impPopStack().val; #if FEATURE_X87_DOUBLES - // X87 stack doesn't differentiate between float/double - // so it doesn't need a cast, but everybody else does - // Just double check it is at least a FP type - noway_assert(varTypeIsFloating(op1)); + // X87 stack doesn't differentiate between float/double + // so it doesn't need a cast, but everybody else does + // Just double check it is at least a FP type + noway_assert(varTypeIsFloating(op1)); #else // FEATURE_X87_DOUBLES - if (op1->TypeGet() != callType) - op1 = gtNewCastNode(callType, op1, callType); + if (op1->TypeGet() != callType) + op1 = gtNewCastNode(callType, op1, callType); #endif // FEATURE_X87_DOUBLES - op1 = new (this, GT_MATH) GenTreeMath(genActualType(callType), op1, intrinsicID - ); - break; - - case 2: - op2 = impPopStack().val; - op1 = impPopStack().val; + op1 = new (this, GT_INTRINSIC) GenTreeIntrinsic(genActualType(callType), op1, intrinsicID, method); + break; + + case 2: + op2 = impPopStack().val; + op1 = impPopStack().val; #if FEATURE_X87_DOUBLES - // X87 stack doesn't differentiate between float/double - // so it doesn't need a cast, but everybody else does - // Just double check it is at least a FP type - noway_assert(varTypeIsFloating(op2)); - noway_assert(varTypeIsFloating(op1)); + // X87 stack doesn't differentiate between float/double + // so it doesn't need a cast, but everybody else does + // Just double check it is at least a FP type + noway_assert(varTypeIsFloating(op2)); + noway_assert(varTypeIsFloating(op1)); #else // FEATURE_X87_DOUBLES - if (op2->TypeGet() != callType) - op2 = gtNewCastNode(callType, op2, callType); - if (op1->TypeGet() != callType) - op1 = gtNewCastNode(callType, op1, callType); + if (op2->TypeGet() != callType) + op2 = gtNewCastNode(callType, op2, callType); + if (op1->TypeGet() != callType) + op1 = gtNewCastNode(callType, op1, callType); #endif // FEATURE_X87_DOUBLES - op1 = new (this, GT_MATH) GenTreeMath(genActualType(callType), op1, op2, intrinsicID - ); - break; - - default: - assert(!"Unsupport number of args for Math Instrinsic"); - return NULL; - } + op1 = new (this, GT_INTRINSIC) GenTreeIntrinsic(genActualType(callType), op1, op2, intrinsicID, method); + break; + default: + assert(!"Unsupport number of args for Math Instrinsic"); + return nullptr; + } + } + +#ifndef LEGACY_BACKEND + if (IsIntrinsicImplementedByUserCall(intrinsicID)) + { + op1->gtFlags |= GTF_CALL; + } +#endif return op1; + #ifndef _TARGET_ARM_ // TODO-ARM-CQ: reenable treating Interlocked operation as intrinsic case CORINFO_INTRINSIC_InterlockedAdd32: @@ -3042,16 +3235,27 @@ InterlockedBinOpCommon: // Get native TypeHandle argument to old helper op1 = op1->gtCall.gtCallArgs; assert(op1->IsList()); - assert(op1->gtOp.gtOp2 == NULL); + assert(op1->gtOp.gtOp2 == nullptr); op1 = op1->gtOp.gtOp1; return op1; } // Call the regular function. return NULL; +#ifndef LEGACY_BACKEND + case CORINFO_INTRINSIC_Object_GetType: + + op1 = impPopStack().val; + op1 = new (this, GT_INTRINSIC) GenTreeIntrinsic(genActualType(callType), op1, intrinsicID, method); + + // Set the CALL flag to indicate that the operator is implemented by a call. + op1->gtFlags |= GTF_CALL; + return op1; +#endif + default: /* Unknown intrinsic */ - return NULL; + return nullptr; } } @@ -4450,7 +4654,8 @@ void Compiler::impImportAndPushBox (CORINFO_RESOLVED_TOKEN * pResolved op1 = gtNewLclvNode(impBoxTemp, TYP_REF); op2 = gtNewIconNode(sizeof(void*), TYP_I_IMPL); op1 = gtNewOperNode(GT_ADD, TYP_BYREF, op1, op2); - if (exprToBox->TypeGet() == TYP_STRUCT) + + if (varTypeIsStruct(exprToBox)) { assert(info.compCompHnd->getClassSize(pResolvedToken->hClass) == info.compCompHnd->getClassSize(operCls)); op1 = impAssignStructPtr(op1, exprToBox, operCls,(unsigned)CHECK_SPILL_ALL); @@ -4550,7 +4755,7 @@ GenTreePtr Compiler::impTransformThis (GenTreePtr thisPtr, GenTreePtr obj = thisPtr; assert(obj->TypeGet() == TYP_BYREF || obj->TypeGet() == TYP_I_IMPL); - obj = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, obj, pConstrainedResolvedToken->hClass); + obj = gtNewLdObjNode(pConstrainedResolvedToken->hClass, obj); obj->gtFlags |= GTF_EXCEPT; CorInfoType jitTyp = info.compCompHnd->asCorInfoType(pConstrainedResolvedToken->hClass); @@ -5054,10 +5259,10 @@ GenTreePtr Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN * pResolv FieldSeqNode* firstElemFldSeq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField); op1 = gtNewOperNode(GT_ADD, TYP_BYREF, op1, new(this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, sizeof(void*), firstElemFldSeq)); - if (lclTyp == TYP_STRUCT) + if (varTypeIsStruct(lclTyp)) { // Constructor adds GTF_GLOB_REF. Note that this is *not* GTF_EXCEPT. - op1 = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, op1, pFieldInfo->structType); + op1 = gtNewLdObjNode(pFieldInfo->structType, op1); } else { @@ -5251,8 +5456,8 @@ bool Compiler::impTailCallRetTypeCompatible(var_types callerRetTy // trust code can make those tail calls. unsigned callerRetTypeSize = 0; unsigned calleeRetTypeSize = 0; - bool isCallerRetTypMBEnreg = VarTypeIsMultiByteAndCanEnreg(callerRetType, callerRetTypeClass, &callerRetTypeSize); - bool isCalleeRetTypMBEnreg = VarTypeIsMultiByteAndCanEnreg(calleeRetType, calleeRetTypeClass, &calleeRetTypeSize); + bool isCallerRetTypMBEnreg = VarTypeIsMultiByteAndCanEnreg(callerRetType, callerRetTypeClass, &callerRetTypeSize, true); + bool isCalleeRetTypMBEnreg = VarTypeIsMultiByteAndCanEnreg(calleeRetType, calleeRetTypeClass, &calleeRetTypeSize, true); if (varTypeIsIntegral(callerRetType) || isCallerRetTypMBEnreg) { @@ -5285,6 +5490,7 @@ bool Compiler::impIsTailCallILPattern(bool tailPrefixed, OPCODE curOpcode, const BYTE *codeAddrOfNextOpcode, const BYTE *codeEnd, + bool isRecursive, bool *isCallPopAndRet /* = nullptr */) { // Bail out if the current opcode is not a call. @@ -5293,11 +5499,16 @@ bool Compiler::impIsTailCallILPattern(bool tailPrefixed, return false; } -#if FEATURE_TAILCALL_OPT_SHARED_RETURN - // we can actually handle if the ret is in a fallthrough block, as long as that is the only part of the sequence. - // Make sure we don't go past the end of the IL however. - codeEnd = min(codeEnd + 1, info.compCode+info.compILCodeSize); +#if !FEATURE_TAILCALL_OPT_SHARED_RETURN + // If shared ret tail opt is not enabled, we will enable + // it for recursive methods. + if (isRecursive) #endif + { + // we can actually handle if the ret is in a fallthrough block, as long as that is the only part of the sequence. + // Make sure we don't go past the end of the IL however. + codeEnd = min(codeEnd + 1, info.compCode + info.compILCodeSize); + } // Bail out if there is no next opcode after call if (codeAddrOfNextOpcode >= codeEnd) @@ -5363,7 +5574,8 @@ bool Compiler::impIsTailCallILPattern(bool tailPrefixed, bool Compiler::impIsImplicitTailCallCandidate(OPCODE opcode, const BYTE *codeAddrOfNextOpcode, const BYTE *codeEnd, - int prefixFlags) + int prefixFlags, + bool isRecursive) { #if FEATURE_TAILCALL_OPT @@ -5379,12 +5591,14 @@ bool Compiler::impIsImplicitTailCallCandidate(OPCODE opcode, #if !FEATURE_TAILCALL_OPT_SHARED_RETURN // the block containing call is marked as BBJ_RETURN - if (compCurBB->bbJumpKind != BBJ_RETURN) + // We allow shared ret tail call optimization on recursive calls even under + // !FEATURE_TAILCALL_OPT_SHARED_RETURN. + if (!isRecursive && (compCurBB->bbJumpKind != BBJ_RETURN)) return false; #endif // !FEATURE_TAILCALL_OPT_SHARED_RETURN // must be call+ret or call+pop+ret - if (!impIsTailCallILPattern(false, opcode, codeAddrOfNextOpcode,codeEnd)) + if (!impIsTailCallILPattern(false, opcode, codeAddrOfNextOpcode, codeEnd, isRecursive)) return false; return true; @@ -5671,13 +5885,10 @@ var_types Compiler::impImportCall (OPCODE opcode, // Recursive call is treaded as a loop to the begining of the method. if (methHnd == info.compMethodHnd) { - /* TODO-CQ: - * Actually transform the function into a loop - */ #ifdef DEBUG if (verbose) { - printf("\nFound recursive call in the method. Mark BB%02u to BB%02u as having a backward branch.\n", + JITDUMP("\nFound recursive call in the method. Mark BB%02u to BB%02u as having a backward branch.\n", fgFirstBB->bbNum, compCurBB->bbNum); } #endif @@ -5848,9 +6059,8 @@ var_types Compiler::impImportCall (OPCODE opcode, // We remove the nullcheck for the GetType call instrinsic. // TODO-CQ: JIT64 does not introduce the null check for many more helper calls // and instrinsics. - if (callInfo->nullInstanceCheck && - !((mflags & CORINFO_FLG_INTRINSIC) != 0 && - intrinsicID == CORINFO_INTRINSIC_Object_GetType)) + if (callInfo->nullInstanceCheck && + !((mflags & CORINFO_FLG_INTRINSIC) != 0 && (intrinsicID == CORINFO_INTRINSIC_Object_GetType))) { call->gtFlags |= GTF_CALL_NULLCHECK; } @@ -5919,12 +6129,12 @@ var_types Compiler::impImportCall (OPCODE opcode, if (mflags & CORINFO_FLG_NOGCCHECK) call->gtCall.gtCallMoreFlags |= GTF_CALL_M_NOGCCHECK; - + // Mark call if it's one of the ones we will maybe treat as an intrinsic if (intrinsicID == CORINFO_INTRINSIC_Object_GetType || intrinsicID == CORINFO_INTRINSIC_TypeEQ || intrinsicID == CORINFO_INTRINSIC_TypeNEQ || - intrinsicID == CORINFO_INTRINSIC_GetCurrentManagedThread || + intrinsicID == CORINFO_INTRINSIC_GetCurrentManagedThread || intrinsicID == CORINFO_INTRINSIC_GetManagedThreadId) { call->gtCall.gtCallMoreFlags |= GTF_CALL_M_SPECIAL_INTRINSIC; @@ -5969,7 +6179,15 @@ var_types Compiler::impImportCall (OPCODE opcode, } } - // Check for varargs + CORINFO_CLASS_HANDLE actualMethodRetTypeSigClass; + actualMethodRetTypeSigClass = sig->retTypeSigClass; + if (varTypeIsStruct(callRetTyp)) + { + callRetTyp = impNormStructType(actualMethodRetTypeSigClass); + call->gtType = callRetTyp; + } + + /* Check for varargs */ #if !FEATURE_VARARG if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG || (sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG) @@ -6010,8 +6228,6 @@ var_types Compiler::impImportCall (OPCODE opcode, #ifdef DEBUG unsigned numArgsDef = sig->numArgs; #endif - CORINFO_CLASS_HANDLE actualMethodRetTypeSigClass = sig->retTypeSigClass; - eeGetCallSiteSig(pResolvedToken->token, info.compScopeHnd, impTokenLookupContextHandle, sig); #ifdef DEBUG @@ -6502,10 +6718,6 @@ DONE: // Must be an implicit tail call. assert((tailCall & PREFIX_TAILCALL_IMPLICIT) != 0); - // implicit tail call i.e. opportunistic tail call. - // We don't mark it as canTailCall since it could be an in-line - // candidate. - // // It is possible that a call node is both an inline candidate and marked // for opportunistic tail calling. In-lining happens before morhphing of // trees. If in-lining of an in-line candidate gets aborted for whatever @@ -6528,26 +6740,26 @@ DONE: #endif //FEATURE_TAILCALL_OPT } - // we can't report success just yet... + // we can't report success just yet... } else { canTailCall = false; - // canTailCall reported it's reasons already + // canTailCall reported its reasons already #ifdef DEBUG if (verbose) { - printf("\ninfo.compCompHnd->canTailCall return false for call "); + printf("\ninfo.compCompHnd->canTailCall returned false for call "); printTreeID(call); printf("\n"); } -#endif +#endif } } else { - // If this assert fires it means to set canTailCall to false, without setting a reason! - assert(szCanTailCallFailReason != NULL); + // If this assert fires it means that canTailCall was set to false without setting a reason! + assert(szCanTailCallFailReason != nullptr); #ifdef DEBUG if (verbose) @@ -6656,7 +6868,7 @@ DONE_CALL: } // Sometimes "call" is not a GT_CALL (if we imported an intrinsic that didn't turn into a call) - if ((callRetTyp == TYP_STRUCT) && (call->gtOper == GT_CALL)) + if (varTypeIsStruct(callRetTyp) && (call->gtOper == GT_CALL)) { call = impFixupStructReturn(call, sig->retTypeClass); } @@ -6720,14 +6932,14 @@ ABORT_THIS_INLINE_ONLY: #pragma warning(pop) #endif -bool Compiler::impMethodInfo_hasRetBuffArg(CORINFO_METHOD_INFO * methInfo) +bool Compiler::impMethodInfo_hasRetBuffArg(CORINFO_METHOD_INFO * methInfo) { if (methInfo->args.retType != CORINFO_TYPE_VALUECLASS && methInfo->args.retType != CORINFO_TYPE_REFANY) { return false; } -#if defined(_TARGET_AMD64_) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) assert(!info.compIsVarArgs && "Varargs not supported in CoreCLR on Unix."); if (IsRegisterPassable(methInfo->args.retTypeClass)) { @@ -6737,30 +6949,49 @@ bool Compiler::impMethodInfo_hasRetBuffArg(CORINFO_METHOD_INFO * // The struct is not aligned properly or it is bigger than 16 bytes, // or it is custom layout, or it is not passed in registers for any other reason. return true; -#elif defined(_TARGET_X86_) || defined(_TARGET_AMD64_) - // Check for TYP_STRUCT argument that can fit into a single register. - // We don't need a return buffer if: +#elif defined(_TARGET_XARCH_) + // On Amd64 and x86 only we don't need a return buffer if: + // // i) TYP_STRUCT argument that can fit into a single register and // ii) Power of two sized TYP_STRUCT. + // unsigned size = info.compCompHnd->getClassSize(methInfo->args.retTypeClass); - return (size > TARGET_POINTER_SIZE) || ((size & (size - 1)) != 0); -#elif defined(_TARGET_ARM_) - // Check for non HFA: in ARM HFAs are returned in registers. + if ((size <= TARGET_POINTER_SIZE) && isPow2(size)) + { + return false; + } +#else + // Generally we don't need a return buffer if: + // i) TYP_STRUCT argument that can fit into a single register + // The power of two size requirement only applies for Amd64 and x86. + // + + unsigned size = info.compCompHnd->getClassSize(methInfo->args.retTypeClass); + if (size <= TARGET_POINTER_SIZE) + { + return false; + } +#endif + +#if FEATURE_MULTIREG_STRUCT_RET + + // Support for any additional cases that don't use a Return Buffer Argument + // on targets that support multi-reg return valuetypes. + // + #ifdef _TARGET_ARM_ + // On ARM HFAs are returned in registers. if (!info.compIsVarArgs && IsHfa(methInfo->args.retTypeClass)) { return false; } - // Check for TYP_STRUCT argument that can fit into a single register. - return (info.compCompHnd->getClassSize(methInfo->args.retTypeClass) > TARGET_POINTER_SIZE); -#elif defined(_TARGET_ARM64_) - // TODO-ARM64-NYI: HFA/HVA arguments. - // Check for TYP_STRUCT argument that is greater than 16 bytes. - return info.compCompHnd->getClassSize(methInfo->args.retTypeClass) > 16; -#else // _TARGET_* -#error Unsupported or unset target architecture -#endif // _TARGET_* -} + #endif + +#endif + // Otherwise we require that a RetBuffArg be used + return true; + +} #ifdef DEBUG // @@ -6829,7 +7060,9 @@ GenTreePtr Compiler::impFixupStructReturn(GenTreePtr call, CORINFO_CLASS_HANDLE retClsHnd) { assert(call->gtOper == GT_CALL); - if (call->TypeGet() != TYP_STRUCT) + + + if (!varTypeIsStruct(call)) { return call; } @@ -6862,7 +7095,7 @@ GenTreePtr Compiler::impFixupStructReturn(GenTreePtr call, return call; } - return impAssignStructToVar(call, retClsHnd); + return impAssignStructClassToVar(call, retClsHnd); } #elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // Not allowed for FEATURE_CORCLR which is the only SKU available for System V OSs. @@ -6888,83 +7121,30 @@ GenTreePtr Compiler::impFixupStructReturn(GenTreePtr call, // If we can tail call returning in registers struct or inline a method that returns // a registers returned struct, then don't assign it to // a variable back and forth. - return impAssignStructToVar(call, retClsHnd); + return impAssignStructClassToVar(call, retClsHnd); } } } else { call->gtCall.gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG; - } + } return call; #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING - unsigned size = info.compCompHnd->getClassSize(retClsHnd); + unsigned size = info.compCompHnd->getClassSize(retClsHnd); BYTE gcPtr = 0; // Check for TYP_STRUCT argument that can fit into a single register // change the type on those trees. - // TODO-ARM64-NYI: what about structs 9 to 16 bytes that fit in two consecutive registers? - switch (size) + var_types regType = argOrReturnTypeForStruct(retClsHnd, true); + if (regType != TYP_UNKNOWN) + { + call->gtCall.gtReturnType = regType; + } + else { - case 1: - call->gtCall.gtReturnType = TYP_BYTE; - break; - - case 2: - call->gtCall.gtReturnType = TYP_SHORT; - break; - -#ifdef _TARGET_X86_ - case 4: -#endif - -#ifdef _TARGET_ARM64_ - case 3: - case 4: - call->gtCall.gtReturnType = TYP_INT; - break; -#endif - -#ifdef _TARGET_ARM_ - case 3: - case 4: -#endif // _TARGET_ARM_ - -#ifdef _TARGET_ARM64_ - case 5: - case 6: - case 7: - case 8: -#endif // _TARGET_ARM64_ - -#ifdef _TARGET_AMD64_ - case 4: - call->gtCall.gtReturnType = TYP_INT; - break; - - case 8: -#endif // _TARGET_AMD64_ - - // case POINTER_SIZED - info.compCompHnd->getClassGClayout(retClsHnd, &gcPtr); - if (gcPtr == TYPE_GC_NONE) - { - call->gtCall.gtReturnType = TYP_I_IMPL; - } - else if (gcPtr == TYPE_GC_REF) - { - call->gtCall.gtReturnType = TYP_REF; - } - else if (gcPtr == TYPE_GC_BYREF) - { - call->gtCall.gtReturnType = TYP_BYREF; - } - break; - - default: call->gtCall.gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG; - break; } return call; @@ -6978,7 +7158,7 @@ GenTreePtr Compiler::impFixupStructReturn(GenTreePtr call, GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CLASS_HANDLE retClsHnd) { - assert(info.compRetType == TYP_STRUCT); + assert(varTypeIsStruct(info.compRetType)); assert(info.compRetBuffArg == BAD_VAR_NUM); #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) @@ -7008,7 +7188,7 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CL return op; } - return impAssignStructToVar(op, retClsHnd); + return impAssignStructClassToVar(op, retClsHnd); } } #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING @@ -7018,11 +7198,12 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CL { if (op->gtOper == GT_LCL_VAR) { +#if FEATURE_MULTIREG_STRUCT_RET // This LCL_VAR is an HFA return value, it stays as a TYP_STRUCT unsigned lclNum = op->gtLclVarCommon.gtLclNum; // Make sure this struct type stays as struct so that we can return it as an HFA lvaTable[lclNum].lvDontPromote = true; - +#endif // FEATURE_MULTIREG_STRUCT_RET return op; } @@ -7040,7 +7221,7 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CL return op; } } - return impAssignStructToVar(op, retClsHnd); + return impAssignStructClassToVar(op, retClsHnd); } #endif @@ -7114,7 +7295,7 @@ REDO_RETURN_NODE: else #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING { - assert(info.compRetNativeType == op->gtCall.gtReturnType); + assert(info.compRetNativeType == op->gtCall.gtReturnType); } #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING #endif // DEBUG @@ -7137,7 +7318,7 @@ REDO_RETURN_NODE: else #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING { - op->gtType = info.compRetNativeType; + op->gtType = info.compRetNativeType; } return op; @@ -8927,6 +9108,14 @@ _PopValue: tiRetVal = se.seTypeInfo; } +#ifdef FEATURE_SIMD + if (varTypeIsSIMD(lclTyp) && (lclTyp != op1->TypeGet())) + { + assert(op1->TypeGet() == TYP_STRUCT); + op1->gtType = lclTyp; + } +#endif // FEATURE_SIMD + op1 = impImplicitIorI4Cast(op1, lclTyp); #ifdef _TARGET_64BIT_ @@ -9020,7 +9209,7 @@ _PopValue: } #endif // !FEATURE_X87_DOUBLES - if (lclTyp == TYP_STRUCT) + if (varTypeIsStruct(lclTyp)) { op1 = impAssignStruct(op2, op1, clsHnd, (unsigned)CHECK_SPILL_ALL); } @@ -9526,6 +9715,7 @@ ARR_LD_POST_VERIFY: op2->gtOper == GT_ADD) { block->bbFlags |= BBF_HAS_INDX; + optMethodFlags |= OMF_HAS_ARRAYREF; } } @@ -9568,8 +9758,7 @@ ARR_LD_POST_VERIFY: if (ldstruct) { // Do a LDOBJ on the result - op1 = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, op1, ldelemClsHnd - ); + op1 = gtNewLdObjNode(ldelemClsHnd, op1); op1->gtFlags |= GTF_EXCEPT; } impPushOnStack(op1, tiRetVal); @@ -9737,6 +9926,18 @@ ARR_LD_POST_VERIFY: op3 = impCheckForNullPointer(op3); + // Mark the block as containing an index expression + + if (op3->gtOper == GT_LCL_VAR) + { + if (op1->gtOper == GT_LCL_VAR || + op1->gtOper == GT_ADD) + { + block->bbFlags |= BBF_HAS_INDX; + optMethodFlags |= OMF_HAS_ARRAYREF; + } + } + /* Create the index node */ op1 = gtNewIndexRef(lclTyp, op3, op1); @@ -9749,7 +9950,9 @@ ARR_LD_POST_VERIFY: op1->gtIndex.gtStructElemClass = stelemClsHnd; op1->gtIndex.gtIndElemSize = info.compCompHnd->getClassSize(stelemClsHnd); - + } + if (varTypeIsStruct(op1)) + { // wrap it in a & op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, op1); op1 = impAssignStructPtr(op1, op2, stelemClsHnd, (unsigned)CHECK_SPILL_ALL); @@ -10562,7 +10765,7 @@ _CONV: // Since we are throwing away the value, just normalize // it to its address. This is more efficient. - if (op1->TypeGet() == TYP_STRUCT) + if (varTypeIsStruct(op1)) { op1 = impGetStructAddr(op1, clsHnd, (unsigned)CHECK_SPILL_ALL, false); } @@ -11197,35 +11400,23 @@ DO_LDFTN: /* Remember that this basic block contains 'new' of an object */ block->bbFlags |= BBF_HAS_NEWOBJ; + optMethodFlags |= OMF_HAS_NEWOBJ; } else { // This is the normal case where the size of the object is // fixed. Allocate the memory and call the constructor. - // See if this is a valueclass constructor that is immediately followed by a - // stobj to a local. In that case we do not have to allocate a temporary - // local, we can just store directly to it. + // Note: We cannot add a peep to avoid use of temp here + // becase we don't have enough interference info to detect when + // sources and destination interfere, example: s = new S(ref); - OPCODE peekNext = (OPCODE)getU1LittleEndian(codeAddr + sz); - lclNum = -1; - bool needTemp = true; - - if (peekNext == CEE_STOBJ && clsFlags & CORINFO_FLG_VALUECLASS) - { - GenTree* dst = impStackTop(callInfo.sig.numArgs).val; - if (dst->gtOper == GT_ADDR && dst->gtOp.gtOp1->gtOper == GT_LCL_VAR) - { - lclNum = dst->gtOp.gtOp1->AsLclVarCommon()->gtLclNum; - needTemp = false; - } - } + // TODO: We find the correct place to introduce a general + // reverse copy prop for struct return values from newobj or + // any function returning structs. /* get a temporary for the new object */ - if (needTemp) - { - lclNum = lvaGrabTemp(true DEBUGARG("NewObj constructor temp")); - } + lclNum = lvaGrabTemp(true DEBUGARG("NewObj constructor temp")); // In the value class case we only need clsHnd for size calcs. // @@ -11246,23 +11437,20 @@ DO_LDFTN: lvaSetStruct(lclNum, resolvedToken.hClass, true /* unsafe value cls check */); } - if (needTemp) - { - // Append a tree to zero-out the temp - newObjThisPtr = gtNewOperNode(GT_ADDR, TYP_BYREF, - gtNewLclvNode(lclNum, lvaTable[lclNum].TypeGet())); - - newObjThisPtr = gtNewBlkOpNode(GT_INITBLK, - newObjThisPtr, // Dest - gtNewIconNode(0), // Value - gtNewIconNode(info.compCompHnd->getClassSize(resolvedToken.hClass)), // Size - false); // volatil - impAppendTree(newObjThisPtr, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs); - } + // Append a tree to zero-out the temp + newObjThisPtr = gtNewOperNode(GT_ADDR, TYP_BYREF, + gtNewLclvNode(lclNum, lvaTable[lclNum].TypeGet())); + + newObjThisPtr = gtNewBlkOpNode(GT_INITBLK, + newObjThisPtr, // Dest + gtNewIconNode(0), // Value + gtNewIconNode(info.compCompHnd->getClassSize(resolvedToken.hClass)), // Size + false); // volatil + impAppendTree(newObjThisPtr, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs); // Obtain the address of the temp newObjThisPtr = gtNewOperNode(GT_ADDR, TYP_BYREF, - gtNewLclvNode(lclNum, lvaTable[lclNum].TypeGet())); + gtNewLclvNode(lclNum, lvaTable[lclNum].TypeGet())); } else { @@ -11285,6 +11473,7 @@ DO_LDFTN: /* Remember that this basic block contains 'new' of an object */ block->bbFlags |= BBF_HAS_NEWOBJ; + optMethodFlags |= OMF_HAS_NEWOBJ; /* Append the assignment to the temp/local. Dont need to spill at all as we are just calling an EE-Jit helper which can only @@ -11396,10 +11585,12 @@ DO_LDFTN: // Note that when running under tail call stress, a call will be marked as explicit tail prefixed // hence will not be considered for implicit tail calling. + bool isRecursive = (callInfo.hMethod == info.compMethodHnd); if (impIsImplicitTailCallCandidate(opcode, codeAddr + sz, codeEndp, - prefixFlags)) + prefixFlags, + isRecursive)) { JITDUMP(" (Implicit Tail call: prefixFlags |= PREFIX_TAILCALL_IMPLICIT)"); prefixFlags |= PREFIX_TAILCALL_IMPLICIT; @@ -11689,6 +11880,12 @@ DO_LDFTN: if (obj->gtType == TYP_BYREF) op1->gtFlags |= GTF_IND_TGTANYWHERE; + DWORD typeFlags = info.compCompHnd->getClassAttribs(resolvedToken.hClass); + if (StructHasOverlappingFields(typeFlags)) + { + op1->gtField.gtFldMayOverlap = true; + } + // wrap it in a address of operator if necessary if (isLoadAddress) { @@ -11939,6 +12136,11 @@ FIELD_DONE: /* Create the data member node */ op1 = gtNewFieldRef(lclTyp, resolvedToken.hField, obj, fieldInfo.offset); + DWORD typeFlags = info.compCompHnd->getClassAttribs(resolvedToken.hClass); + if (StructHasOverlappingFields(typeFlags)) + { + op1->gtField.gtFldMayOverlap = true; + } #ifdef FEATURE_READYTORUN_COMPILER if (fieldInfo.fieldAccessor == CORINFO_FIELD_INSTANCE_WITH_BASE) @@ -12000,7 +12202,7 @@ FIELD_DONE: } /* Create the member assignment, unless we have a struct */ - bool deferStructAssign = (lclTyp == TYP_STRUCT); + bool deferStructAssign = varTypeIsStruct(lclTyp); if (!deferStructAssign) { @@ -12181,6 +12383,8 @@ FIELD_DONE: /* Remember that this basic block contains 'new' of an sd array */ block->bbFlags |= BBF_HAS_NEWARRAY; + optMethodFlags |= OMF_HAS_NEWARRAY; + /* Push the result of the call on the stack */ impPushOnStack(op1, tiRetVal); @@ -12459,8 +12663,8 @@ FIELD_DONE: // inline the common case of the unbox helper // UNBOX(exp) morphs into // clone = pop(exp); - // ((*clone != typeToken) ? helper(clone, typeToken) : nop); - // push(clone + 4) + // ((*clone == typeToken) ? nop : helper(clone, typeToken)); + // push(clone + sizeof(void*)) // GenTreePtr cloneOperand; op1 = impCloneExpr(op1, &cloneOperand, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, NULL DEBUGARG("inline UNBOX clone1") ); @@ -12479,9 +12683,17 @@ FIELD_DONE: ); op1 = gtNewQmarkNode(TYP_VOID, condBox, op1); condBox->gtFlags |= GTF_RELOP_QMARK; - - impAppendTree(op1, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs); - + + // QMARK nodes cannot reside on the evaluation stack. Because there + // may be other trees on the evaluation stack that side-effect the + // sources of the UNBOX operation we must spill the stack. + + impAppendTree(op1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs); + + // Create the address-expression to reference past the object header + // to the beginning of the value-type. Today this means adjusting + // past the base of the objects vtable field which is pointer sized. + op2 = gtNewIconNode(sizeof(void*), TYP_I_IMPL); op1 = gtNewOperNode(GT_ADD, TYP_BYREF, cloneOperand, op2); } @@ -12503,7 +12715,7 @@ FIELD_DONE: } assert(helper == CORINFO_HELP_UNBOX && op1->gtType == TYP_BYREF || // Unbox helper returns a byref. - helper == CORINFO_HELP_UNBOX_NULLABLE && op1->gtType == TYP_STRUCT // UnboxNullable helper returns a struct. + helper == CORINFO_HELP_UNBOX_NULLABLE && varTypeIsStruct(op1) // UnboxNullable helper returns a struct. ); /* @@ -12530,7 +12742,7 @@ FIELD_DONE: { if (helper == CORINFO_HELP_UNBOX_NULLABLE) { - // Unbox nullable helper returns a TYP_STRUCT. + // Unbox nullable helper returns a struct type. // We need to spill it to a temp so than can take the address of it. // Here we need unsafe value cls check, since the address of struct is taken to be used // further along and potetially be exploitable. @@ -12560,7 +12772,7 @@ FIELD_DONE: impPushOnStack(op1, tiRetVal); oper = GT_LDOBJ; goto LDOBJ; - } + } assert(helper == CORINFO_HELP_UNBOX_NULLABLE && "Make sure the helper is nullable!"); #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) @@ -12595,13 +12807,13 @@ FIELD_DONE: impPushOnStack(op1, tiRetVal); // Load the struct. - oper = GT_LDOBJ; + oper = GT_LDOBJ; assert(op1->gtType == TYP_BYREF); assert(!tiVerificationNeeded || tiRetVal.IsByRef()); - goto LDOBJ; - } + goto LDOBJ; + } else { // If non register passable struct we have it materialized in the RetBuf. @@ -12610,11 +12822,11 @@ FIELD_DONE: assert(tiRetVal.IsValueClass()); } } - + #else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) assert(op1->gtType == TYP_STRUCT); tiRetVal = verMakeTypeInfo(resolvedToken.hClass); - assert(tiRetVal.IsValueClass()); + assert(tiRetVal.IsValueClass()); #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) } @@ -13016,7 +13228,7 @@ INITBLK_OR_INITOBJ: op2 = impPopStack().val; // Value op1 = impPopStack().val; // Ptr - assertImp(op2->TypeGet() == TYP_STRUCT); + assertImp(varTypeIsStruct(op2)); op1 = impAssignStructPtr(op1, op2, resolvedToken.hClass, (unsigned)CHECK_SPILL_ALL); goto SPILL_APPEND; @@ -13121,24 +13333,22 @@ LDOBJ: assertImp(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL); - // LDOBJ returns a struct - // and an inline argument which is the class token of the loaded obj - op1 = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, op1, resolvedToken.hClass); - op1->gtFlags |= GTF_EXCEPT; - CorInfoType jitTyp = info.compCompHnd->asCorInfoType(resolvedToken.hClass); if (impIsPrimitive(jitTyp)) { - // GT_IND is a large node, but its OK if GTF_IND_RNGCHK is not set - op1->ChangeOperUnchecked(GT_IND); + op1 = gtNewOperNode(GT_IND, JITtype2varType(jitTyp), op1); - // ldobj could point anywhere, example a boxed class static int - op1->gtFlags |= GTF_IND_TGTANYWHERE; - - op1->gtType = JITtype2varType(jitTyp); - op1->gtOp.gtOp2 = 0; // must be zero for tree walkers + // Could point anywhere, example a boxed class static int + op1->gtFlags |= GTF_IND_TGTANYWHERE|GTF_GLOB_REF; assertImp(varTypeIsArithmetic(op1->gtType)); } + else + { + // LDOBJ returns a struct + // and an inline argument which is the class token of the loaded obj + op1 = gtNewLdObjNode(resolvedToken.hClass, op1); + } + op1->gtFlags |= GTF_EXCEPT; impPushOnStack(op1, tiRetVal); break; @@ -13447,14 +13657,10 @@ void Compiler::impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr src, CORIN } #endif -#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) -GenTreePtr Compiler::impAssignStructToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass) +#if FEATURE_MULTIREG_STRUCT_RET +GenTreePtr Compiler::impAssignStructClassToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass) { -#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for register returned structs in System V")); -#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for HFA structs in ARM")); -#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for multireg structs.")); impAssignTempGen(tmpNum, op, hClass, (unsigned) CHECK_SPILL_NONE); GenTreePtr ret = gtNewLclvNode(tmpNum, TYP_STRUCT); #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) @@ -13472,7 +13678,7 @@ GenTreePtr Compiler::impAssignStructToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hC #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) return ret; } -#endif +#endif // FEATURE_MULTIREG_STRUCT_RET // do import for a return // returns false if inlining was aborted @@ -13493,7 +13699,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & Verify(!verIsByRefLike(tiDeclared) || verIsSafeToReturnByRef(tiVal) , "byref return"); - + Verify(tiCompatibleWith(tiVal, tiDeclared.NormaliseForStack(), true), "type mismatch"); expectedStack=1; } @@ -13552,9 +13758,16 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & } #endif - /* Make sure the type matches the original call */ + // Make sure the type matches the original call. + + var_types returnType = genActualType(op2->gtType); + var_types originalCallType = impInlineInfo->inlineCandidateInfo->fncRetType; + if ((returnType != originalCallType) && (originalCallType == TYP_STRUCT)) + { + originalCallType = impNormStructType(impInlineInfo->inlineCandidateInfo->methInfo.args.retTypeClass); + } - if (genActualType(op2->gtType) != impInlineInfo->inlineCandidateInfo->fncRetType) + if (returnType != originalCallType) { JITLOG((LL_INFO1000000, INLINER_FAILED "Return types are not matching in %s called by %s\n", impInlineInfo->InlinerCompiler->info.compFullName, info.compFullName)); @@ -13581,7 +13794,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & // we don't unconditionally overwrite it, it shouldn't matter. if (info.compRetNativeType != TYP_STRUCT) { - if (info.compRetType == TYP_STRUCT) + if (varTypeIsStruct(info.compRetType)) { noway_assert(info.compRetBuffArg == BAD_VAR_NUM); // adjust the type away from struct to integral @@ -13679,7 +13892,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & impInlineInfo->retExpr = op2; } else - { + { GenTreePtr iciCall = impInlineInfo->iciCall; assert(iciCall->gtOper == GT_CALL); @@ -13691,11 +13904,11 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & // and the retexpr is just the temp. assert(info.compRetNativeType != TYP_VOID); assert(fgMoreThanOneReturnBlock()); - - impAssignTempGen(lvaInlineeReturnSpillTemp, - op2, - se.seTypeInfo.GetClassHandle(), - (unsigned) CHECK_SPILL_ALL); + + impAssignTempGen(lvaInlineeReturnSpillTemp, + op2, + se.seTypeInfo.GetClassHandle(), + (unsigned)CHECK_SPILL_ALL); } // TODO-ARM64-NYI: HFA // TODO-AMD64-Unix and TODO-ARM once the ARM64 functionality is implemented the @@ -13721,7 +13934,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & if (!impInlineInfo->retExpr) { #if defined(_TARGET_ARM_) - impInlineInfo->retExpr = gtNewLclvNode(lvaInlineeReturnSpillTemp, TYP_STRUCT); + impInlineInfo->retExpr = gtNewLclvNode(lvaInlineeReturnSpillTemp, info.compRetType); #else // !defined(_TARGET_ARM_) // The inlinee compiler has figured out the type of the temp already. Use it here. impInlineInfo->retExpr = gtNewLclvNode(lvaInlineeReturnSpillTemp, lvaTable[lvaInlineeReturnSpillTemp].lvType); @@ -13734,6 +13947,22 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & } } else +#elif defined(_TARGET_ARM64_) + if ((iciCall->gtCall.gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) == 0) + { + if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM) + { + if (!impInlineInfo->retExpr) + { + impInlineInfo->retExpr = gtNewLclvNode(lvaInlineeReturnSpillTemp, TYP_STRUCT); + } + } + else + { + impInlineInfo->retExpr = op2; + } + } + else #endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { assert(iciCall->gtCall.gtCallMoreFlags & GTF_CALL_M_RETBUFFARG); @@ -13746,7 +13975,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & { impInlineInfo->retExpr = impAssignStructPtr( dest, - gtNewLclvNode(lvaInlineeReturnSpillTemp, TYP_STRUCT), + gtNewLclvNode(lvaInlineeReturnSpillTemp, info.compRetType), retClsHnd, (unsigned) CHECK_SPILL_ALL); } @@ -13788,7 +14017,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & op1 = new (this, GT_RETURN) GenTreeOp(GT_RETURN, TYP_VOID); } } - else if (info.compRetType == TYP_STRUCT) + else if (varTypeIsStruct(info.compRetType)) { #if !defined(_TARGET_ARM_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // In ARM HFA native types are maintained as structs. @@ -14460,6 +14689,7 @@ SPILLSTACK: { unsigned temp = lvaGrabTemp(true DEBUGARG("spill addStmt JTRUE ref Op1")); impAssignTempGen(temp, relOp->gtOp.gtOp1, level); + type = genActualType(lvaTable[temp].TypeGet()); relOp->gtOp.gtOp1 = gtNewLclvNode(temp, type); } @@ -14467,6 +14697,7 @@ SPILLSTACK: { unsigned temp = lvaGrabTemp(true DEBUGARG("spill addStmt JTRUE ref Op2")); impAssignTempGen(temp, relOp->gtOp.gtOp2, level); + type = genActualType(lvaTable[temp].TypeGet()); relOp->gtOp.gtOp2 = gtNewLclvNode(temp, type); } } @@ -15424,11 +15655,6 @@ JitInlineResult Compiler::impCanInlineNative(int callsiteNativeEstimate, if ((info.compClassAttr & CORINFO_FLG_VALUECLASS) != 0) { lvaStructPromotionInfo structPromotionInfo; - - structPromotionInfo.typeHnd = 0; - structPromotionInfo.canPromote = false; - structPromotionInfo.requiresScratchVar = false; - lvaCanPromoteStructType(info.compClassHnd, &structPromotionInfo, false); if (structPromotionInfo.canPromote) { @@ -15954,7 +16180,8 @@ JitInlineResult Compiler::impCheckCanInline(GenTreePtr call, assert( (genActualType(fncRealRetType) == genActualType(fncRetType)) || // <BUGNUM> VSW 288602 </BUGNUM> // In case of IJW, we allow to assign a native pointer to a BYREF. - (fncRetType == TYP_BYREF && methInfo.args.retType == CORINFO_TYPE_PTR) + (fncRetType == TYP_BYREF && methInfo.args.retType == CORINFO_TYPE_PTR) || + (varTypeIsStruct(fncRetType) && (fncRealRetType == TYP_STRUCT)) ); #endif @@ -16014,7 +16241,7 @@ JitInlineResult Compiler::impInlineRecordArgInfo(InlineInfo * pInlineInfo, GenTreePtr lclVarTree; if (impIsAddressInLocal(curArgVal, &lclVarTree) && - (lclVarTree->TypeGet() == TYP_STRUCT)) + varTypeIsStruct(lclVarTree)) { inlCurArgInfo->argIsByRefToStructLocal = true; #ifdef FEATURE_SIMD @@ -16210,15 +16437,24 @@ JitInlineResult Compiler::impInlineInitVars(InlineInfo * pInlineInfo) sigType = TYP_REF; lclVarInfo[0].lclVerTypeInfo = verMakeTypeInfo(pInlineInfo->inlineCandidateInfo->clsHandle); - lclVarInfo[0].lclTypeInfo = sigType; lclVarInfo[0].lclHasLdlocaOp = false; #ifdef FEATURE_SIMD - if (!foundSIMDType && isSIMDClass(&(lclVarInfo[0].lclVerTypeInfo))) + // We always want to check isSIMDClass, since we want to set foundSIMDType (to increase + // the inlining multiplier) for anything in that assembly. + // But we only need to normalize it if it is a TYP_STRUCT + // (which we need to do even if we have already set foundSIMDType). + if ((!foundSIMDType || (sigType == TYP_STRUCT)) && + isSIMDClass(&(lclVarInfo[0].lclVerTypeInfo))) { + if (sigType == TYP_STRUCT) + { + sigType = impNormStructType(lclVarInfo[0].lclVerTypeInfo.GetClassHandle()); + } foundSIMDType = true; } #endif // FEATURE_SIMD + lclVarInfo[0].lclTypeInfo = sigType; assert(varTypeIsGC(thisArg->gtType) || // "this" is managed (thisArg->gtType == TYP_I_IMPL && // "this" is unmgd but the method's class doesnt care @@ -16280,16 +16516,25 @@ JitInlineResult Compiler::impInlineInitVars(InlineInfo * pInlineInfo) var_types sigType = (var_types) eeGetArgType(argLst, &methInfo->args); lclVarInfo[i].lclVerTypeInfo = verParseArgSigToTypeInfo(&methInfo->args, argLst); - lclVarInfo[i].lclTypeInfo = sigType; - lclVarInfo[i].lclHasLdlocaOp = false; - #ifdef FEATURE_SIMD - if (!foundSIMDType && isSIMDClass(&(lclVarInfo[i].lclVerTypeInfo))) + if ((!foundSIMDType || (sigType == TYP_STRUCT)) && + isSIMDClass(&(lclVarInfo[i].lclVerTypeInfo))) { + // If this is a SIMD class (i.e. in the SIMD assembly), then we will consider that we've + // found a SIMD type, even if this may not be a type we recognize (the assumption is that + // it is likely to use a SIMD type, and therefore we want to increase the inlining multiplier). foundSIMDType = true; + if (sigType == TYP_STRUCT) + { + var_types structType = impNormStructType(lclVarInfo[i].lclVerTypeInfo.GetClassHandle()); + sigType = structType; + } } #endif // FEATURE_SIMD + lclVarInfo[i].lclTypeInfo = sigType; + lclVarInfo[i].lclHasLdlocaOp = false; + /* Does the tree type match the signature type? */ GenTreePtr inlArgNode = inlArgInfo[i].argNode; @@ -16413,9 +16658,15 @@ JitInlineResult Compiler::impInlineInitVars(InlineInfo * pInlineInfo) localsSig = info.compCompHnd->getArgNext(localsSig); #ifdef FEATURE_SIMD - if (!foundSIMDType && isSIMDClass(&(lclVarInfo[i + argCnt].lclVerTypeInfo))) + if ((!foundSIMDType || (type == TYP_STRUCT)) && + isSIMDClass(&(lclVarInfo[i + argCnt].lclVerTypeInfo))) { foundSIMDType = true; + if (featureSIMD && type == TYP_STRUCT) + { + var_types structType = impNormStructType(lclVarInfo[i + argCnt].lclVerTypeInfo.GetClassHandle()); + lclVarInfo[i + argCnt].lclTypeInfo = structType; + } } #endif // FEATURE_SIMD } @@ -16462,7 +16713,7 @@ unsigned Compiler::impInlineFetchLocal(unsigned lclNum if (impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclVerTypeInfo.IsStruct()) { - if (lclTyp == TYP_STRUCT) + if (varTypeIsStruct(lclTyp)) { lvaSetStruct(tmpNum, impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclVerTypeInfo.GetClassHandle(), true /* unsafe value cls check */); } @@ -16581,7 +16832,7 @@ GenTreePtr Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo *inlArgInfo, if (lclVarInfo[lclNum].lclVerTypeInfo.IsStruct()) { - if (lclTyp == TYP_STRUCT) + if (varTypeIsStruct(lclTyp)) { lvaSetStruct(tmpNum, impInlineInfo->lclVarInfo[lclNum].lclVerTypeInfo.GetClassHandle(), true /* unsafe value cls check */); } @@ -16705,9 +16956,9 @@ BOOL Compiler::impInlineIsGuaranteedThisDerefBeforeAnySideEffects // Check the inlining eligibility of this GT_CALL node. // Mark GTF_CALL_INLINE_CANDIDATE on the GT_CALL node -void Compiler::impMarkInlineCandidate(GenTreePtr call, CORINFO_CONTEXT_HANDLE exactContextHnd) +void Compiler::impMarkInlineCandidate(GenTreePtr callNode, CORINFO_CONTEXT_HANDLE exactContextHnd) { - assert(call->gtOper == GT_CALL); + GenTreeCall* call = callNode->AsCall(); const char * inlineFailReason = NULL; JitInlineResult result; @@ -16739,13 +16990,22 @@ void Compiler::impMarkInlineCandidate(GenTreePtr call, CORINFO_CONTEXT_ return; } - // In-lining candidate determination need to honor only IL tail prefix. - // In-lining takes precedence over implicit tail call optimization. - if (call->gtCall.IsTailPrefixedCall()) + // Inlining candidate determination needs to honor only IL tail prefix. + // Inlining takes precedence over implicit tail call optimization (if the call is not directly recursive). + if (call->IsTailPrefixedCall()) { inlineFailReason = "Call site marked as tailcall."; goto InlineFailed; } + // Tail recursion elimination takes precedence over inlining. + // TODO: We may want to do some of the additional checks from fgMorphCall + // here to reduce the chance we don't inline a call that won't be optimized + // as a fast tail call or turned into a loop. + if (gtIsRecursiveCall(call) && call->IsImplicitTailCall()) + { + inlineFailReason = "Recursive tail call"; goto InlineFailed; + } + if ((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) != GTF_CALL_NONVIRT) { inlineFailReason = "Not a direct call."; goto InlineFailed; @@ -16753,13 +17013,13 @@ void Compiler::impMarkInlineCandidate(GenTreePtr call, CORINFO_CONTEXT_ /* Ignore helper calls */ - if (call->gtCall.gtCallType == CT_HELPER) + if (call->gtCallType == CT_HELPER) { inlineFailReason = "Inlinee is a helper call."; goto InlineFailed; } /* Ignore indirect calls */ - if (call->gtCall.gtCallType == CT_INDIRECT) + if (call->gtCallType == CT_INDIRECT) { inlineFailReason = "Not a direct managed call."; goto InlineFailed; } @@ -16768,10 +17028,10 @@ void Compiler::impMarkInlineCandidate(GenTreePtr call, CORINFO_CONTEXT_ * restricts the inliner to non-expanding inlines. I removed the check to allow for non-expanding * inlining in throw blocks. I should consider the same thing for catch and filter regions. */ - - CORINFO_METHOD_HANDLE fncHandle; - fncHandle = call->gtCall.gtCallMethHnd; unsigned methAttr; + CORINFO_METHOD_HANDLE fncHandle; + + fncHandle = call->gtCallMethHnd; methAttr = info.compCompHnd->getMethodAttribs(fncHandle); #ifdef DEBUG @@ -16781,6 +17041,12 @@ void Compiler::impMarkInlineCandidate(GenTreePtr call, CORINFO_CONTEXT_ } #endif + // Check for COMPLUS_AgressiveInlining + if (compDoAggressiveInlining) + { + methAttr |= CORINFO_FLG_FORCEINLINE; + } + if (!(methAttr & CORINFO_FLG_FORCEINLINE)) { /* Don't bother inline blocks that are in the filter region */ @@ -16859,9 +17125,9 @@ void Compiler::impMarkInlineCandidate(GenTreePtr call, CORINFO_CONTEXT_ } // The old value should be NULL - assert(call->gtCall.gtInlineCandidateInfo == NULL); + assert(call->gtInlineCandidateInfo == nullptr); - call->gtCall.gtInlineCandidateInfo = inlineCandidateInfo; + call->gtInlineCandidateInfo = inlineCandidateInfo; // Mark the call node as inline candidate. call->gtFlags |= GTF_CALL_INLINE_CANDIDATE; @@ -16877,10 +17143,115 @@ InlineFailed: JITDUMP("\nInliningFailed: %s\n", inlineFailReason); JitInlineResult inlineResult(INLINE_FAIL, info.compMethodHnd, //I think that this test is for a virtual call. - (call->gtCall.gtCallType == CT_USER_FUNC) ? call->gtCall.gtCallMethHnd: NULL, + (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd: nullptr, inlineFailReason); inlineResult.report(info.compCompHnd); return; } - + +/******************************************************************************/ +// Returns true if the given intrinsic will be implemented by target-specific +// instructions + +bool Compiler::IsTargetIntrinsic(CorInfoIntrinsics intrinsicId) +{ +#if defined(_TARGET_AMD64_) + switch (intrinsicId) + { + // Amd64 only has SSE2 instruction to directly compute sqrt/abs. + case CORINFO_INTRINSIC_Sqrt: + case CORINFO_INTRINSIC_Abs: + return true; + + default: + return false; + } +#elif defined(_TARGET_ARM64_) + switch (intrinsicId) + { + case CORINFO_INTRINSIC_Sqrt: + case CORINFO_INTRINSIC_Abs: + case CORINFO_INTRINSIC_Round: + return true; + + default: + return false; + } +#elif defined(_TARGET_ARM_) + switch (intrinsicId) + { + case CORINFO_INTRINSIC_Sqrt: + case CORINFO_INTRINSIC_Abs: + case CORINFO_INTRINSIC_Round: + return true; + + default: + return false; + } +#elif defined(_TARGET_X86_) + switch (intrinsicId) + { + case CORINFO_INTRINSIC_Sin: + case CORINFO_INTRINSIC_Cos: + case CORINFO_INTRINSIC_Sqrt: + case CORINFO_INTRINSIC_Abs: + case CORINFO_INTRINSIC_Round: + return true; + + default: + return false; + } +#else + // TODO: This portion of logic is not implemented for other arch. + // The reason for returning true is that on all other arch the only intrinsic + // enabled are target intrinsics. + return true; +#endif //_TARGET_AMD64_ +} + +/******************************************************************************/ +// Returns true if the given intrinsic will be implemented by calling System.Math +// methods. + +bool Compiler::IsIntrinsicImplementedByUserCall(CorInfoIntrinsics intrinsicId) +{ + // Currently, if an math intrisic is not implemented by target-specific + // intructions, it will be implemented by a System.Math call. In the + // future, if we turn to implementing some of them with helper callers, + // this predicate needs to be revisited. + return !IsTargetIntrinsic(intrinsicId); +} + +bool Compiler::IsMathIntrinsic(CorInfoIntrinsics intrinsicId) +{ + switch (intrinsicId) + { + case CORINFO_INTRINSIC_Sin: + case CORINFO_INTRINSIC_Sqrt: + case CORINFO_INTRINSIC_Abs: + case CORINFO_INTRINSIC_Cos: + case CORINFO_INTRINSIC_Round: + case CORINFO_INTRINSIC_Cosh: + case CORINFO_INTRINSIC_Sinh: + case CORINFO_INTRINSIC_Tan: + case CORINFO_INTRINSIC_Tanh: + case CORINFO_INTRINSIC_Asin: + case CORINFO_INTRINSIC_Acos: + case CORINFO_INTRINSIC_Atan: + case CORINFO_INTRINSIC_Atan2: + case CORINFO_INTRINSIC_Log10: + case CORINFO_INTRINSIC_Pow: + case CORINFO_INTRINSIC_Exp: + case CORINFO_INTRINSIC_Ceiling: + case CORINFO_INTRINSIC_Floor: + return true; + default: + return false; + } +} + +bool Compiler::IsMathIntrinsic(GenTreePtr tree) +{ + return (tree->OperGet() == GT_INTRINSIC) && IsMathIntrinsic(tree->gtIntrinsic.gtIntrinsicId); +} /*****************************************************************************/ diff --git a/src/jit/instr.cpp b/src/jit/instr.cpp index c63952ae12..f1132b6abc 100644 --- a/src/jit/instr.cpp +++ b/src/jit/instr.cpp @@ -2285,6 +2285,7 @@ void CodeGen::inst_RV_TT(instruction ins, #if CPU_LOAD_STORE_ARCH if (ins == INS_mov) { +#if defined (_TARGET_ARM_) if (tree->TypeGet() != TYP_LONG) { ins = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL)!=0); @@ -2297,6 +2298,11 @@ void CodeGen::inst_RV_TT(instruction ins, { ins = ins_Move_Extend(TYP_INT, (tree->gtFlags & GTF_REG_VAL)!=0 && genRegPairHi(tree->gtRegPair) != REG_STK); } +#elif defined(_TARGET_ARM64_) + ins = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL)!=0); +#else + NYI("CodeGen::inst_RV_TT with INS_mov"); +#endif } #endif // CPU_LOAD_STORE_ARCH @@ -3132,7 +3138,7 @@ bool CodeGenInterface::validImmForBL (ssize_t addr) // This matches the usual behavior for NGEN, since we normally do generate "BL". (!compiler->info.compMatchedVM && (compiler->opts.eeFlags & CORJIT_FLG_PREJIT)) || - (compiler->info.compCompHnd->getRelocTypeHint((void*)addr) == IMAGE_REL_BASED_THUMB_BRANCH24); + (compiler->eeGetRelocTypeHint((void*)addr) == IMAGE_REL_BASED_THUMB_BRANCH24); } bool CodeGen::arm_Valid_Imm_For_BL (ssize_t addr) { @@ -3192,15 +3198,13 @@ instruction CodeGen::ins_Move_Extend(var_types srcType, #if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) if (varTypeIsFloating(srcType)) { - InstructionSet iset = compiler->getFloatingPointInstructionSet(); - if (srcType == TYP_DOUBLE) { - return INS_movsdsse2; + return (srcInReg) ? INS_movaps : INS_movsdsse2; } else if (srcType == TYP_FLOAT) { - return INS_movss; + return (srcInReg) ? INS_movaps : INS_movss; } else { @@ -3324,6 +3328,13 @@ instruction CodeGenInterface::ins_Load(var_types srcType, if (varTypeIsSIMD(srcType)) { #if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) +#ifdef FEATURE_SIMD + if (srcType == TYP_SIMD8) + { + return INS_movsdsse2; + } + else +#endif // FEATURE_SIMD if (compiler->canUseAVX()) { // TODO-CQ: consider alignment of AVX vectors. @@ -3475,6 +3486,13 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligne #if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) if (varTypeIsSIMD(dstType)) { +#ifdef FEATURE_SIMD + if (dstType == TYP_SIMD8) + { + return INS_movsdsse2; + } + else +#endif // FEATURE_SIMD if (compiler->canUseAVX()) { // TODO-CQ: consider alignment of AVX vectors. @@ -3896,7 +3914,7 @@ void CodeGen::instGen_MemoryBarrier() #elif defined (_TARGET_ARM_) getEmitter()->emitIns_I(INS_dmb, EA_4BYTE, 0xf); #elif defined (_TARGET_ARM64_) - getEmitter()->emitIns_BARR(INS_dmb, INS_BARRIER_ST); + getEmitter()->emitIns_BARR(INS_dmb, INS_BARRIER_SY); #else #error "Unknown _TARGET_" #endif diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index 9f8a9956a3..47245e9d51 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -154,23 +154,23 @@ INSTMUL(imul_15, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400003868, BAD_CODE) // So a 4-byte opcode would be something like this: // 0x22114433 -#define B3(byte1,byte2,byte3) ((byte1 << 16) | (byte2 << 24) | byte3) -#define B2(byte1,byte2) ((byte1 << 16) | byte2) -#define SSEFLT(c) B3(0xf3, 0x0f, c) -#define SSEDBL(c) B3(0xf2, 0x0f, c) -#define PCKDBL(c) B3(0x66, 0x0f, c) -#define PCKFLT(c) B2(0x0f,c) +#define PACK3(byte1,byte2,byte3) ((byte1 << 16) | (byte2 << 24) | byte3) +#define PACK2(byte1,byte2) ((byte1 << 16) | byte2) +#define SSEFLT(c) PACK3(0xf3, 0x0f, c) +#define SSEDBL(c) PACK3(0xf2, 0x0f, c) +#define PCKDBL(c) PACK3(0x66, 0x0f, c) +#define PCKFLT(c) PACK2(0x0f,c) // These macros encode extra byte that is implicit in the macro. -#define B4(byte1,byte2,byte3,byte4) ((byte1 << 16) | (byte2 << 24) | byte3 | (byte4 << 8)) -#define SSE38(c) B4(0x66, 0x0f, 0x38, c) -#define SSE3A(c) B4(0x66, 0x0f, 0x3A, c) +#define PACK4(byte1,byte2,byte3,byte4) ((byte1 << 16) | (byte2 << 24) | byte3 | (byte4 << 8)) +#define SSE38(c) PACK4(0x66, 0x0f, 0x38, c) +#define SSE3A(c) PACK4(0x66, 0x0f, 0x3A, c) // VEX* encodes the implied leading opcode bytes in c1: // 1: implied 0f, 2: implied 0f 38, 3: implied 0f 3a -#define VEX2INT(c1,c2) B3(c1, 0xc5, c2) -#define VEX3INT(c1,c2) B4(c1, 0xc5, 0x02, c2) -#define VEX3FLT(c1,c2) B4(c1, 0xc5, 0x02, c2) +#define VEX2INT(c1,c2) PACK3(c1, 0xc5, c2) +#define VEX3INT(c1,c2) PACK4(c1, 0xc5, 0x02, c2) +#define VEX3FLT(c1,c2) PACK4(c1, 0xc5, 0x02, c2) // Please insert any SSE2 instructions between FIRST_SSE2_INSTRUCTION and LAST_SSE2_INSTRUCTION INST3(FIRST_SSE2_INSTRUCTION, "FIRST_SSE2_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE) @@ -427,7 +427,6 @@ INST1(fcomi , "fcomi" , 1, IUM_RD, 0, 1, 0x00F0DB) INST1(fcomip , "fcomip" , 1, IUM_RD, 0, 1, 0x00F0DF) INST1(fchs , "fchs" , 1, IUM_RW, 0, 1, 0x00E0D9) -#if INLINE_MATH INST1(fabs , "fabs" , 1, IUM_RW, 0, 1, 0x00E1D9) INST1(fsin , "fsin" , 1, IUM_RW, 0, 1, 0x00FED9) INST1(fcos , "fcos" , 1, IUM_RW, 0, 1, 0x00FFD9) @@ -436,7 +435,6 @@ INST1(fldl2e , "fldl2e" , 1, IUM_RW, 0, 1, 0x00EAD9) INST1(frndint, "frndint" , 1, IUM_RW, 0, 1, 0x00FCD9) INST1(f2xm1 , "f2xm1" , 1, IUM_RW, 0, 1, 0x00F0D9) INST1(fscale , "fscale" , 1, IUM_RW, 0, 1, 0x00FDD9) -#endif INST1(fld1 , "fld1" , 1, IUM_WR, 0, 0, 0x00E8D9) INST1(fldz , "fldz" , 1, IUM_WR, 0, 0, 0x00EED9) diff --git a/src/jit/jit.h b/src/jit/jit.h index 2901ffd6eb..5f32ab2287 100644 --- a/src/jit/jit.h +++ b/src/jit/jit.h @@ -391,7 +391,7 @@ typedef ptrdiff_t ssize_t; #define INLINE_NDIRECT INLINE_PINVOKE // ndirect is an archaic name for pinvoke #define LONG_MATH_REGPARAM 0 // args to long mul/div passed in registers #define STACK_PROBES 0 // Support for stack probes -#define XML_FLOWGRAPHS DEBUG // Support for creating Xml Flowgraph reports in *.fgx files +#define DUMP_FLOWGRAPHS DEBUG // Support for creating Xml Flowgraph reports in *.fgx files #define HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION 1 // if 1 we must have all handler entry points in the Hot code section @@ -461,19 +461,21 @@ const bool dspGCtbls = true; #ifdef DEBUG void JitDump(const char* pcFormat, ...); -#define DISPNODE(t) if (GetTlsCompiler()->verbose) GetTlsCompiler()->gtDispTree(t, nullptr, nullptr, true); - -#define JITDUMP(...) JitDump(__VA_ARGS__) +#define JITDUMP(...) { if (GetTlsCompiler()->verbose) JitDump(__VA_ARGS__); } #define JITLOG(x) { JitLogEE x; } #define JITLOG_THIS(t, x) { (t)->JitLogEE x; } #define DBEXEC(flg, expr) if (flg) {expr;} +#define DISPNODE(t) if (GetTlsCompiler()->verbose) GetTlsCompiler()->gtDispTree(t, nullptr, nullptr, true); +#define DISPTREE(x) if (GetTlsCompiler()->verbose) GetTlsCompiler()->gtDispTree(x) +#define VERBOSE GetTlsCompiler()->verbose #else // !DEBUG #define JITDUMP(...) #define JITLOG(x) #define JITLOG_THIS(t, x) +#define DBEXEC(flg, expr) +#define DISPNODE(t) #define DISPTREE(x) #define VERBOSE 0 -#define DBEXEC(flg, expr) #endif // !DEBUG /***************************************************************************** @@ -710,18 +712,6 @@ private: #define FEATURE_TAILCALL_OPT_SHARED_RETURN 0 #endif // !FEATURE_TAILCALL_OPT -/*****************************************************************************/ - -#ifndef INLINE_MATH -#if CPU_HAS_FP_SUPPORT -#define INLINE_MATH 1 // enable inline math intrinsics -#else -#define INLINE_MATH 0 // disable inline math intrinsics -#endif -#endif - -/*****************************************************************************/ - #define CLFLG_CODESIZE 0x00001 #define CLFLG_CODESPEED 0x00002 #define CLFLG_CSE 0x00004 @@ -817,7 +807,6 @@ void SetTlsCompiler(Compiler* c); #include "compiler.h" -void JitDump(const char* pcFormat, ...); template<typename T> T dspPtr(T p) { @@ -830,10 +819,8 @@ T dspOffset(T o) return (o == 0) ? 0 : (GetTlsCompiler()->opts.dspDiffable ? T(0xD1FFAB1E) : o); } -#define DISPNODE(t) if (GetTlsCompiler()->verbose) GetTlsCompiler()->gtDispTree(t, nullptr, nullptr, true); -#define DISPTREE(x) if (GetTlsCompiler()->verbose) GetTlsCompiler()->gtDispTree(x) -#define VERBOSE GetTlsCompiler()->verbose -#else // defined(DEBUG) +#else // !defined(DEBUG) + template<typename T> T dspPtr(T p) { @@ -845,8 +832,8 @@ T dspOffset(T o) { return o; } -#define DISPNODE(t) -#endif // defined(DEBUG) + +#endif // !defined(DEBUG) /*****************************************************************************/ #endif //_JIT_H_ diff --git a/src/jit/jit.settings.targets b/src/jit/jit.settings.targets index f5c073d777..dae789470a 100644 --- a/src/jit/jit.settings.targets +++ b/src/jit/jit.settings.targets @@ -5,10 +5,9 @@ <UserIncludes> $(UserIncludes); - $(VCToolsIncPath); ..; ..\jitstd\; - ..\ssa\; + $(Clrbase)\src\TraceLog; </UserIncludes> <!-- PCH baloney --> @@ -16,14 +15,27 @@ <PCHCompile>..\jitpch.cpp</PCHCompile> <PCHHeader>jitpch.h</PCHHeader> - <ClDefines Condition="'$(DebugBuild)' == 'false'">$(ClDefines);FAST=1</ClDefines> + <!-- JIT_BUILD disables certain PAL_TRY debugging features --> <ClDefines>$(ClDefines);JIT_BUILD=1</ClDefines> + + <ClDefines Condition="'$(DebugBuild)' == 'false'">$(ClDefines);FAST=1</ClDefines> <ClDefines Condition="'$(DebugBuild)' == 'true'">$(ClDefines);DEBUG=1</ClDefines> </PropertyGroup> + <!-- For debugging purposes only, temporarily enable these in RET builds so GenTree debugging is easier. --> + <!-- We need to link with /OPT:NOICF or our magic vtable debugging system for GenTree doesn't work. --> + <PropertyGroup Condition="'$(DebugBuild)' == 'true'"> + <!-- This is already automatically defined in DEBUG builds. + <ClDefines>$(ClDefines);DEBUGGABLE_GENTREE=1</ClDefines> + --> + <LinkEnableCOMDATFolding>false</LinkEnableCOMDATFolding> <!-- /OPT:NOICF --> + <ClAdditionalOptions>$(ClAdditionalOptions) /Ob0</ClAdditionalOptions> <!-- no inlining --> + </PropertyGroup> + <!-- Leaf Project Items --> <ItemGroup> <CppCompile Include="..\alloc.cpp" /> + <CppCompile Include="..\earlyprop.cpp" /> <CppCompile Include="..\bitset.cpp" /> <CppCompile Include="..\block.cpp" /> <CppCompile Include="..\Compiler.cpp" /> @@ -41,6 +53,7 @@ <CppCompile Include="..\hashbv.cpp" /> <CppCompile Include="..\Importer.cpp" /> <CppCompile Include="..\Instr.cpp" /> + <CppCompile Include="..\JitTelemetry.cpp" /> <CppCompile Include="..\LclVars.cpp" /> <CppCompile Include="..\Liveness.cpp" /> <CppCompile Include="..\Morph.cpp" /> @@ -107,9 +120,6 @@ <CppCompile Include="..\unwindArm.cpp" /> <CppCompile Include="..\unwindArm64.cpp" /> </ItemGroup> - <ItemGroup> - <ClDisableSpecificWarnings Include="4480"/> - </ItemGroup> <!-- Import the targets - this actually contains the full build rules --> <Import Project="$(_NTDRIVE)$(_NTROOT)\ndp\clr\clr.targets" /> diff --git a/src/jit/jiteh.cpp b/src/jit/jiteh.cpp index 94555b92a3..6f7f2ce8a8 100644 --- a/src/jit/jiteh.cpp +++ b/src/jit/jiteh.cpp @@ -1409,7 +1409,7 @@ void Compiler::fgRemoveEHTableEntry(unsigned XTnum) if (XTnum < compHndBBtabCount) { /* We copy over the old entry */ - memcpy(HBtab, HBtab + 1, (compHndBBtabCount - XTnum) * sizeof(*HBtab)); + memmove(HBtab, HBtab + 1, (compHndBBtabCount - XTnum) * sizeof(*HBtab)); } else { diff --git a/src/jit/jittelemetry.cpp b/src/jit/jittelemetry.cpp new file mode 100644 index 0000000000..aa1ed6d655 --- /dev/null +++ b/src/jit/jittelemetry.cpp @@ -0,0 +1,388 @@ +// ==++== +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// ==--== +/*****************************************************************************/ +// <OWNER>clrjit</OWNER> +// +// This class abstracts the telemetry information collected for the JIT. +// +// Goals: +// 1. Telemetry information should be a NO-op when JIT level telemetry is disabled. +// 2. Data collection should be actionable. +// 3. Data collection should comply to privacy rules. +// 4. Data collection cannot impact JIT/OS performance. +// 5. Data collection volume should be manageable by our remote services. +// +// DESIGN CONCERNS: +// +// > To collect data, we use the TraceLogging API provided by Windows. +// +// The brief workflow suggested is: +// #include <TraceLoggingProvider.h> +// TRACELOGGING_DEFINE_PROVIDER( // defines g_hProvider +// g_hProvider, // Name of the provider variable +// "MyProvider", // Human-readable name of the provider +// (0xb3864c38, 0x4273, 0x58c5, 0x54, 0x5b, 0x8b, 0x36, 0x08, 0x34, 0x34, 0x71)); // Provider GUID +// int main(int argc, char* argv[]) // or DriverEntry for kernel-mode. +// { +// TraceLoggingRegister(g_hProvider, NULL, NULL, NULL); // NULLs only needed for C. Please do not include the NULLs in C++ code. +// TraceLoggingWrite(g_hProvider, +// "MyEvent1", +// TraceLoggingString(argv[0], "arg0"), +// TraceLoggingInt32(argc)); +// TraceLoggingUnregister(g_hProvider); +// return 0; +// } +// +// In summary, this involves: +// 1. Creating a binary/DLL local provider using: +// TRACELOGGING_DEFINE_PROVIDER(g_hProvider, "ProviderName", providerId, [option]) +// 2. Registering the provider instance +// TraceLoggingRegister(g_hProvider) +// 3. Perform TraceLoggingWrite operations to write out data. +// 4. Unregister the provider instance. +// TraceLoggingUnregister(g_hProvider) +// +// A. Determining where to create the provider instance? +// 1) We use the same provider name/GUID as the CLR and the CLR creates its own DLL local provider handle. +// For CLRJIT.dll, the question is, can the same provider name/GUIDs be shared across binaries? +// +// Answer: +// "For TraceLogging providers, it is okay to use the same provider GUID / name +// in different binaries. Do not share the same provider handle across DLLs. +// As long as you do not pass an hProvider from one DLL to another, TraceLogging +// will properly keep track of the events." +// +// 2) CoreCLR is linked into the CLR. CLR already creates an instance, so where do we create the JIT's instance? +// Answer: +// "Ideally you would have one provider per DLL, but if you're folding distinct sets +// of functionality into one DLL (like shell32.dll or similar sort of catch-all things) +// you can have perhaps a few more providers per binary." +// +// B. Determining where to register and unregister the provider instance? +// 1) For CLRJIT.dll we can register the provider instance during jitDllOnProcessAttach. +// Since one of our goals is to turn telemetry off, we need to be careful about +// referencing environment variables during the DLL load and unload path. +// Referencing environment variables through ConfigDWORD uses UtilCode. +// This roughly translates to InitUtilcode() being called before jitDllOnProcessAttach. +// +// For CLRJIT.dll, compStartup is called on jitOnDllProcessAttach(). +// This can be called twice through sxsJitStartup -- so prevent double initialization. +// UtilCode is init-ed by this time. The same is true for CoreCLR. +// +// 2) For CLRJIT.dll and CoreCLR, compShutdown will be called during jitOnDllProcessDetach(). +// +// C. Determining the data to collect: +// +// IMPORTANT: Since telemetry data can be collected at any time after DLL load, +// make sure you initialize the compiler state variables you access in telemetry +// data collection. For example, if you are transmitting method names, then +// make sure info.compMethodHnd is initialized at that point. +// +// 1) Tracking noway assert count: +// After a noway assert is hit, in both min-opts and non-min-opts, we collect +// info such as the JIT version, method hash being compiled, filename and +// line number etc. +// +// 2) Tracking baseline for the noway asserts: +// During DLL unload, we report the number of methods that were compiled by +// the JIT per process both under normal mode and during min-opts. NOTE that +// this is ON for all processes. +// +// 3) For the future, be aware of privacy, performance and actionability of the data. +// + +#include "jitpch.h" +#include "compiler.h" + +#ifdef FEATURE_TRACELOGGING +#include "TraceLoggingProvider.h" +#include "MicrosoftTelemetry.h" +#include "clrtraceloggingcommon.h" +#include "fxver.h" + +// Since telemetry code could be called under a noway_assert, make sure, +// we don't call noway_assert again. +#undef noway_assert + +#define BUILD_STR1(x) #x +#define BUILD_STR2(x) BUILD_STR1(x) +#define BUILD_MACHINE BUILD_STR2(__BUILDMACHINE__) + +// A DLL local instance of the DotNet provider +TRACELOGGING_DEFINE_PROVIDER(g_hClrJitProvider, CLRJIT_PROVIDER_NAME, CLRJIT_PROVIDER_ID, TraceLoggingOptionMicrosoftTelemetry()); + +// Threshold to detect if we are hitting too many bad (noway) methods +// over good methods per process to prevent logging too much data. +static const double NOWAY_NOISE_RATIO = 0.6; // Threshold of (bad / total) beyond which we'd stop logging. We'd restart if the pass rate improves. +static const unsigned NOWAY_SUFFICIENCY_THRESHOLD = 25; // Count of methods beyond which we'd apply percent threshold + +// Initialize Telemetry State +volatile bool JitTelemetry::s_fProviderRegistered = false; +volatile UINT32 JitTelemetry::s_uMethodsCompiled = 0; +volatile UINT32 JitTelemetry::s_uMethodsHitNowayAssert = 0; + +// Constructor for telemetry state per compiler instance +JitTelemetry::JitTelemetry() +{ + Initialize(nullptr); +} + +//------------------------------------------------------------------------ +// Initialize: Initialize the object with the compiler instance +// +// Description: +// Compiler instance may not be fully initialized. If you are +// tracking object data for telemetry, make sure they are initialized +// in the compiler is ready. +// +void JitTelemetry::Initialize(Compiler* c) +{ + comp = c; + m_pszAssemblyName = ""; + m_pszScopeName = ""; + m_pszMethodName = ""; + m_uMethodHash = 0; + m_fMethodInfoCached = false; +} + +//------------------------------------------------------------------------ +// IsTelemetryEnabled: Can we perform JIT telemetry +// +// Return Value: +// Returns "true" if COMPlus_JitTelemetry environment flag is +// non-zero. Else returns "false". +// +// +/* static */ +bool JitTelemetry::IsTelemetryEnabled() +{ + static ConfigDWORD fJitTelemetry; + return fJitTelemetry.val(CLRConfig::EXTERNAL_JitTelemetry) != 0; +} + +//------------------------------------------------------------------------ +// NotifyDllProcessAttach: Notification for DLL load and static initializations +// +// Description: +// Register telemetry provider with the OS. +// +// Note: +// This method can be called twice in NGEN scenario. +// +void JitTelemetry::NotifyDllProcessAttach() +{ + if (!IsTelemetryEnabled()) + { + return; + } + + if (!s_fProviderRegistered) + { + // Register the provider. + TraceLoggingRegister(g_hClrJitProvider); + s_fProviderRegistered = true; + } +} + +//------------------------------------------------------------------------ +// NotifyDllProcessDetach: Notification for DLL unload and teardown +// +// Description: +// Log the methods compiled data if telemetry is enabled and +// Unregister telemetry provider with the OS. +// +void JitTelemetry::NotifyDllProcessDetach() +{ + if (!IsTelemetryEnabled()) + { + return; + } + + assert(s_fProviderRegistered); // volatile read + + // Unregister the provider. + TraceLoggingUnregister(g_hClrJitProvider); +} + +//------------------------------------------------------------------------ +// NotifyEndOfCompilation: Notification for end of current method +// compilation. +// +// Description: +// Increment static volatile counters for the current compiled method. +// This is slightly inaccurate due to lack of synchronization around +// the counters. Inaccuracy is the tradeoff for JITting cost. +// +// Note: +// 1. Must be called post fully successful compilation of the method. +// 2. This serves as an effective baseline as how many methods compiled +// successfully. +void JitTelemetry::NotifyEndOfCompilation() +{ + if (!IsTelemetryEnabled()) + { + return; + } + + s_uMethodsCompiled++; // volatile increment +} + +//------------------------------------------------------------------------ +// NotifyNowayAssert: Notification that a noway handling is under-way. +// +// Arguments: +// filename - The JIT source file name's absolute path at the time of +// building the JIT. +// line - The line number where the noway assert was hit. +// +// Description: +// If telemetry is enabled, then obtain data to collect from the +// compiler or the VM and use the tracelogging APIs to write out. +// +void JitTelemetry::NotifyNowayAssert(const char* filename, unsigned line) +{ + if (!IsTelemetryEnabled()) + { + return; + } + + s_uMethodsHitNowayAssert++; + + // Check if our assumption that noways are rare is invalid for this + // process. If so, return early than logging too much data. + unsigned noways = s_uMethodsHitNowayAssert; + unsigned attempts = max(1, s_uMethodsCompiled + noways); + double ratio = (noways / ((double) attempts)); + if (noways > NOWAY_SUFFICIENCY_THRESHOLD && ratio > NOWAY_NOISE_RATIO) + { + return; + } + + assert(comp); + + UINT32 nowayIndex = s_uMethodsHitNowayAssert; + UINT32 codeSize = 0; + INT32 minOpts = -1; + const char* lastPhase = ""; + if (comp != nullptr) + { + codeSize = comp->info.compILCodeSize; + minOpts = comp->opts.IsMinOptsSet() ? comp->opts.MinOpts() : -1; + lastPhase = PhaseNames[comp->previousCompletedPhase]; + } + + CacheCurrentMethodInfo(); + + TraceLoggingWrite(g_hClrJitProvider, + "CLRJIT.NowayAssert", + + TraceLoggingUInt32(codeSize, "IL_CODE_SIZE"), + TraceLoggingInt32(minOpts, "MINOPTS_MODE"), + TraceLoggingString(lastPhase, "PREVIOUS_COMPLETED_PHASE"), + + TraceLoggingString(m_pszAssemblyName, "ASSEMBLY_NAME"), + TraceLoggingString(m_pszMethodName, "METHOD_NAME"), + TraceLoggingString(m_pszScopeName, "METHOD_SCOPE"), + TraceLoggingUInt32(m_uMethodHash, "METHOD_HASH"), + + TraceLoggingString(filename, "FILENAME"), + TraceLoggingUInt32(line, "LINE"), + TraceLoggingUInt32(nowayIndex, "NOWAY_INDEX"), + + TraceLoggingString(TARGET_READABLE_NAME, "ARCH"), + TraceLoggingString(VER_FILEVERSION_STR, "VERSION"), + TraceLoggingString(BUILD_MACHINE, "BUILD"), + TraceLoggingString(VER_COMMENTS_STR, "FLAVOR"), + + TraceLoggingKeyword(MICROSOFT_KEYWORD_TELEMETRY)); +} + +//------------------------------------------------------------------------ +// CacheCurrentMethodInfo: Cache the method/assembly/scope name info. +// +// Description: +// Obtain the method information if not already cached, for the +// method under compilation from the compiler. This includes: +// +// Method name, assembly name, scope name, method hash. +// +void JitTelemetry::CacheCurrentMethodInfo() +{ + if (m_fMethodInfoCached) + { + return; + } + + assert(comp); + if (comp != nullptr) + { + comp->compGetTelemetryDefaults(&m_pszAssemblyName, &m_pszScopeName, &m_pszMethodName, &m_uMethodHash); + assert(m_pszAssemblyName); + assert(m_pszScopeName); + assert(m_pszMethodName); + } + + // Set cached to prevent getting this twice. + m_fMethodInfoCached = true; +} + +//------------------------------------------------------------------------ +// compGetTelemetryDefaults: Obtain information specific to telemetry +// from the JIT-interface. +// +// Arguments: +// assemblyName - Pointer to hold assembly name upon return +// scopeName - Pointer to hold scope name upon return +// methodName - Pointer to hold method name upon return +// methodHash - Pointer to hold method hash upon return +// +// Description: +// Obtains from the JIT EE interface the information for the +// current method under compilation. +// +// Warning: +// The eeGetMethodName call could be expensive for generic +// methods, so call this method only when there is less impact +// to throughput. +// +void Compiler::compGetTelemetryDefaults(const char** assemblyName, const char** scopeName, const char** methodName, unsigned* methodHash) +{ + if (info.compMethodHnd != nullptr) + { + __try + { + + // Expensive calls, call infrequently or in exceptional scenarios. + *methodHash = info.compCompHnd->getMethodHash(info.compMethodHnd); + *methodName = eeGetMethodName(info.compMethodHnd, scopeName); + + // SuperPMI needs to implement record/replay of these method calls. + *assemblyName = info.compCompHnd->getAssemblyName( + info.compCompHnd->getModuleAssembly( + info.compCompHnd->getClassModule(info.compClassHnd))); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + } + } + + // If the JIT interface methods init-ed these values to nullptr, + // make sure they are set to empty string. + if (*methodName == nullptr) + { + *methodName = ""; + } + if (*scopeName == nullptr) + { + *scopeName = ""; + } + if (*assemblyName == nullptr) + { + *assemblyName = ""; + } +} + +#endif // FEATURE_TRACELOGGING diff --git a/src/jit/jittelemetry.h b/src/jit/jittelemetry.h new file mode 100644 index 0000000000..020c2be422 --- /dev/null +++ b/src/jit/jittelemetry.h @@ -0,0 +1,81 @@ +// ==++== +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// ==--== +/*****************************************************************************/ +// <OWNER>clrjit</OWNER> +#pragma once + +#ifdef FEATURE_TRACELOGGING + +class Compiler; + +class JitTelemetry +{ +public: + + // Notify DLL load. + static void NotifyDllProcessAttach(); + + // Notify DLL unload. + static void NotifyDllProcessDetach(); + + // Constructor + JitTelemetry(); + + // Initialize with compiler instance + void Initialize(Compiler* comp); + + // Notification of end of compilation of the current method. + void NotifyEndOfCompilation(); + + // Notification of noway_assert. + void NotifyNowayAssert(const char* filename, unsigned line); + + // Is telemetry enabled through COMPLUS_JitTelemetry? + static bool IsTelemetryEnabled(); + +private: + + // Obtain current method information from VM and cache for + // future uses. + void CacheCurrentMethodInfo(); + + // + //-------------------------------------------------------------------------------- + // The below per process counters are updated without synchronization or + // thread-safety to avoid interfering with the JIT throughput. Accuracy + // of these counters will be traded-off for throughput. + // + + // Methods compiled per DLL unload + static volatile UINT32 s_uMethodsCompiled; + + // Methods compiled per DLL unload that hit noway assert (per process) + static volatile UINT32 s_uMethodsHitNowayAssert; + //-------------------------------------------------------------------------------- + + // Has the provider been registered already (per process) + static volatile bool s_fProviderRegistered; + + // Cached value of current method hash. + unsigned m_uMethodHash; + + // Cached value of current assembly name. + const char* m_pszAssemblyName; + + // Cached value of current scope name, i.e., "Program.Foo" in "Program.Foo:Main" + const char* m_pszScopeName; + + // Cached value of current method name, i.e., "Main" in "Program.Foo:Main" + const char* m_pszMethodName; + + // Have we already cached the method/scope/assembly names? + bool m_fMethodInfoCached; + + // Compiler instance. + Compiler* comp; +}; + +#endif // FEATURE_TRACELOGGING diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index 3f39b401b6..675be90c17 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -117,12 +117,20 @@ void Compiler::lvaInitTypeRef() info.compILargsCount = info.compArgsCount; +#ifdef FEATURE_SIMD + if (featureSIMD && (info.compRetNativeType == TYP_STRUCT)) + { + var_types structType = impNormStructType(info.compMethodInfo->args.retTypeClass); + info.compRetType = structType; + } +#endif // FEATURE_SIMD + // Are we returning a struct by value? const bool hasRetBuffArg = impMethodInfo_hasRetBuffArg(info.compMethodInfo); // Change the compRetNativeType if we are returning a struct by value in a register - if (!hasRetBuffArg && (info.compRetNativeType == TYP_STRUCT)) + if (!hasRetBuffArg && varTypeIsStruct(info.compRetNativeType)) { #ifdef _TARGET_ARM_ // TODO-ARM64-NYI: HFA @@ -145,45 +153,12 @@ void Compiler::lvaInitTypeRef() info.compRetNativeType = getEightByteType(structDesc, 0); } #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING - unsigned size = info.compCompHnd->getClassSize(info.compMethodInfo->args.retTypeClass); - // Check for TYP_STRUCT argument that can fit into a single register - switch (size) - { - case 1: - info.compRetNativeType = TYP_BYTE; - break; - - case 2: - info.compRetNativeType = TYP_SHORT; - break; - -#ifdef _TARGET_64BIT_ - case 4: - info.compRetNativeType = TYP_INT; - break; - - case 8: -#else // 32-bit TARGET - case 3: - case 4: -#endif - // case POINTER_SIZED + var_types argRetType = argOrReturnTypeForStruct(info.compMethodInfo->args.retTypeClass, true /* forReturn */); + info.compRetNativeType = argRetType; + if (argRetType == TYP_UNKNOWN) { - BYTE gcPtr = 0; - info.compCompHnd->getClassGClayout(info.compMethodInfo->args.retTypeClass, &gcPtr); - if (gcPtr == TYPE_GC_NONE) - info.compRetNativeType = TYP_I_IMPL; - else if (gcPtr == TYPE_GC_REF) - info.compRetNativeType = TYP_REF; - else if (gcPtr == TYPE_GC_BYREF) - info.compRetNativeType = TYP_BYREF; - } - break; - - default: assert(!"Unexpected size when returning struct by value"); - break; } #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING } @@ -416,7 +391,14 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo * varDscInfo) #ifdef FEATURE_SIMD if (featureSIMD) { - checkForSIMDType(varDsc, info.compClassHnd); + var_types simdBaseType = TYP_UNKNOWN; + var_types type = impNormStructType(info.compClassHnd, nullptr, nullptr, &simdBaseType); + if (simdBaseType != TYP_UNKNOWN) + { + assert(varTypeIsSIMD(type)); + varDsc->lvSIMDType = true; + varDsc->lvBaseType = simdBaseType; + } } #endif // FEATURE_SIMD } @@ -728,7 +710,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister); } - if (canPassArgInRegisters) + if (canPassArgInRegisters) { /* Another register argument */ @@ -737,11 +719,14 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) // to the stack happens. unsigned firstAllocatedRegArgNum = 0; +#if FEATURE_MULTIREG_STRUCT_ARGS + varDsc->lvOtherArgReg = REG_NA; +#endif + #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) unsigned secondAllocatedRegArgNum = 0; var_types firstEightByteType = TYP_UNDEF; var_types secondEightByteType = TYP_UNDEF; - varDsc->lvOtherArgReg = REG_NA; if (argType == TYP_STRUCT) { @@ -768,9 +753,10 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) varDsc->lvIsRegArg = 1; -#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if FEATURE_MULTIREG_STRUCT_ARGS if (argType == TYP_STRUCT) { +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType); // If there is a second eightbyte, get a register for it too and map the arg to the reg number. @@ -785,9 +771,19 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) varDsc->lvOtherArgReg = genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType); varDsc->addPrefReg(genRegMask(varDsc->lvOtherArgReg), this); } +#else // ARM32 or ARM64 + varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL); +#ifdef _TARGET_ARM64_ + if (cSlots == 2) + { + varDsc->lvOtherArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum+1, TYP_I_IMPL); + varDsc->addPrefReg(genRegMask(varDsc->lvOtherArgReg), this); + } +#endif // _TARGET_ARM64_ +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) } else -#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) +#endif // FEATURE_MULTIREG_STRUCT_ARGS { varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argType); } @@ -1046,6 +1042,14 @@ void Compiler::lvaInitVarArgsHandle(InitVarDscInfo * varDscInfo) } #endif // DEBUG } +#ifndef LEGACY_BACKEND + else + { + // For the RyuJIT backend, we need to mark these as being on the stack, + // as this is not done elsewhere in the case that canEnreg returns false. + varDsc->lvOnFrame = true; + } +#endif // !LEGACY_BACKEND /* Update the total argument size, count and varDsc */ @@ -1116,7 +1120,7 @@ void Compiler::lvaInitVarDsc(LclVarDsc * varDsc, // For an incoming ValueType we better be able to have the full type information // so that we can layout the parameter offsets correctly - if (type == TYP_STRUCT && varDsc->lvVerTypeInfo.IsDead()) + if (varTypeIsStruct(type) && varDsc->lvVerTypeInfo.IsDead()) { BADCODE("invalid ValueType parameter"); } @@ -1149,7 +1153,7 @@ void Compiler::lvaInitVarDsc(LclVarDsc * varDsc, // We can get typeHnds for primitive types, these are value types which only contain // a primitive. We will need the typeHnd to distinguish them, so we store it here. if ((cFlags & CORINFO_FLG_VALUECLASS) && - (type != TYP_STRUCT)) + !varTypeIsStruct(type)) { if (tiVerificationNeeded == false) { @@ -1161,13 +1165,18 @@ void Compiler::lvaInitVarDsc(LclVarDsc * varDsc, varDsc->lvOverlappingFields = StructHasOverlappingFields(cFlags); } - varDsc->lvType = type; // set the lvType (before this point it is TYP_UNDEF) - if (varTypeIsGC(type)) varDsc->lvStructGcCount = 1; - if (type == TYP_STRUCT) + // Set the lvType (before this point it is TYP_UNDEF). + if (type == TYP_STRUCT) + { lvaSetStruct(varNum, typeHnd, typeHnd!=NULL, !tiVerificationNeeded); + } + else + { + varDsc->lvType = type; + } #if OPT_BOOL_OPS if (type == TYP_BOOL) @@ -1545,7 +1554,7 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd, /***************************************************************************** - * Is this TYP_STRUCT local variable promotable? */ + * Is this struct type local variable promotable? */ void Compiler::lvaCanPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo * StructPromotionInfo) { @@ -1553,7 +1562,7 @@ void Compiler::lvaCanPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo LclVarDsc * varDsc = &lvaTable[lclNum]; - noway_assert(varDsc->lvType == TYP_STRUCT); + noway_assert(varTypeIsStruct(varDsc)); noway_assert(!varDsc->lvPromoted); // Don't ask again :) #ifdef FEATURE_SIMD @@ -1586,7 +1595,7 @@ void Compiler::lvaCanPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo /***************************************************************************** - * Promote a TYP_STRUCT local */ + * Promote a struct type local */ void Compiler::lvaPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo * StructPromotionInfo) { @@ -1773,7 +1782,7 @@ void Compiler::lvaPromoteLongVars() unsigned Compiler::lvaGetFieldLocal(LclVarDsc * varDsc, unsigned int fldOffset) { - noway_assert(varDsc->lvType == TYP_STRUCT); + noway_assert(varTypeIsStruct(varDsc)); noway_assert(varDsc->lvPromoted); for (unsigned i = varDsc->lvFieldLclStart; @@ -1808,7 +1817,7 @@ void Compiler::lvaSetVarAddrExposed(unsigned varNum) if (varDsc->lvPromoted) { - noway_assert(varDsc->lvType == TYP_STRUCT); + noway_assert(varTypeIsStruct(varDsc)); for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; @@ -1848,7 +1857,7 @@ void Compiler::lvaSetVarDoNotEnregister(unsigned varNum DEBUG_ARG( break; case DNER_IsStruct: JITDUMP("it is a struct\n"); - assert(varDsc->lvType == TYP_STRUCT); + assert(varTypeIsStruct(varDsc)); break; case DNER_BlockOp: JITDUMP("written in a block op\n"); @@ -1891,22 +1900,40 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, boo noway_assert(varNum < lvaCount); LclVarDsc * varDsc = &lvaTable[varNum]; - varDsc->lvType = TYP_STRUCT; - if (setTypeInfo) varDsc->lvVerTypeInfo = typeInfo(TI_STRUCT, typeHnd); - varDsc->lvExactSize = info.compCompHnd->getClassSize(typeHnd); + // Set the type and associated info if we haven't already set it. + if ((varDsc->lvType == TYP_UNDEF) || (varDsc->lvType = TYP_STRUCT)) + { + varDsc->lvType = TYP_STRUCT; - size_t lvSize = varDsc->lvSize(); - assert((lvSize % sizeof(void*)) == 0); // The struct needs to be a multiple of sizeof(void*) bytes for getClassGClayout() to be valid. - varDsc->lvGcLayout = (BYTE*) compGetMemA((lvSize / sizeof(void*)) * sizeof(BYTE), CMK_LvaTable); - unsigned numGCVars = info.compCompHnd->getClassGClayout(typeHnd, varDsc->lvGcLayout); + varDsc->lvExactSize = info.compCompHnd->getClassSize(typeHnd); - // We only save the count of GC vars in a struct up to 7. - if (numGCVars >= 8) - numGCVars = 7; - varDsc->lvStructGcCount = numGCVars; + size_t lvSize = varDsc->lvSize(); + assert((lvSize % sizeof(void*)) == 0); // The struct needs to be a multiple of sizeof(void*) bytes for getClassGClayout() to be valid. + varDsc->lvGcLayout = (BYTE*) compGetMemA((lvSize / sizeof(void*)) * sizeof(BYTE), CMK_LvaTable); + unsigned numGCVars; + var_types simdBaseType = TYP_UNKNOWN; + varDsc->lvType = impNormStructType(typeHnd, varDsc->lvGcLayout, &numGCVars, &simdBaseType); + + // We only save the count of GC vars in a struct up to 7. + if (numGCVars >= 8) + numGCVars = 7; + varDsc->lvStructGcCount = numGCVars; +#if FEATURE_SIMD + if (simdBaseType != TYP_UNKNOWN) + { + assert(varTypeIsSIMD(varDsc)); + varDsc->lvSIMDType = true; + varDsc->lvBaseType = simdBaseType; + } +#endif // FEATURE_SIMD + } + else + { + assert(varDsc->lvExactSize != 0); + } #ifndef _TARGET_64BIT_ bool fDoubleAlignHint = FALSE; @@ -1926,14 +1953,6 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, boo } #endif // not _TARGET_64BIT_ -#ifdef FEATURE_SIMD - // Check to see if this is a SIMD type. - if (featureSIMD && !varDsc->lvIsSIMDType()) - { - checkForSIMDType(varDsc, typeHnd); - } -#endif //FEATURE_SIMD - unsigned classAttribs = info.compCompHnd->getClassAttribs(typeHnd); varDsc->lvOverlappingFields = StructHasOverlappingFields(classAttribs); @@ -1956,7 +1975,7 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, boo BYTE * Compiler::lvaGetGcLayout(unsigned varNum) { - noway_assert(lvaTable[varNum].lvType == TYP_STRUCT); + noway_assert(varTypeIsStruct(lvaTable[varNum].lvType) && (lvaTable[varNum].lvExactSize >= TARGET_POINTER_SIZE)); return lvaTable[varNum].lvGcLayout; } @@ -2575,7 +2594,7 @@ void Compiler::lvaSortByRefCount() varDsc->lvTracked = 0; assert(varDsc->lvType != TYP_STRUCT || varDsc->lvDoNotEnregister); // For structs, should have set this when we set lvAddrExposed. } - else if (varDsc->lvType == TYP_STRUCT) + else if (varTypeIsStruct(varDsc)) { // Promoted structs will never be considered for enregistration anyway, // and the DoNotEnregister flag was used to indicate whether promotion was @@ -2584,7 +2603,7 @@ void Compiler::lvaSortByRefCount() { varDsc->lvTracked = 0; } - else if (!varDsc->lvRegStruct) + else if ((varDsc->lvType == TYP_STRUCT) && !varDsc->lvRegStruct) { lvaSetVarDoNotEnregister(lclNum DEBUG_ARG(DNER_IsStruct)); } @@ -2631,6 +2650,12 @@ void Compiler::lvaSortByRefCount() case TYP_LONG: case TYP_REF: case TYP_BYREF: +#ifdef FEATURE_SIMD + case TYP_SIMD8: + case TYP_SIMD12: + case TYP_SIMD16: + case TYP_SIMD32: +#endif // FEATURE_SIMD case TYP_STRUCT: break; @@ -3016,14 +3041,14 @@ void Compiler::lvaMarkLclRefs(GenTreePtr tree) #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING /* Variables must be used as the same type throughout the method */ - noway_assert(tiVerificationNeeded || - varDsc->lvType == TYP_UNDEF || tree->gtType == TYP_UNKNOWN || - allowStructs || - genActualType(varDsc->TypeGet()) == genActualType(tree->gtType) || - (tree->gtType == TYP_BYREF && varDsc->TypeGet() == TYP_I_IMPL) || - (tree->gtType == TYP_I_IMPL && varDsc->TypeGet() == TYP_BYREF) || - (tree->gtFlags & GTF_VAR_CAST) || - varTypeIsFloating(varDsc->TypeGet()) && varTypeIsFloating(tree->gtType)); + noway_assert(tiVerificationNeeded || + varDsc->lvType == TYP_UNDEF || tree->gtType == TYP_UNKNOWN || + allowStructs || + genActualType(varDsc->TypeGet()) == genActualType(tree->gtType) || + (tree->gtType == TYP_BYREF && varDsc->TypeGet() == TYP_I_IMPL) || + (tree->gtType == TYP_I_IMPL && varDsc->TypeGet() == TYP_BYREF) || + (tree->gtFlags & GTF_VAR_CAST) || + varTypeIsFloating(varDsc->TypeGet()) && varTypeIsFloating(tree->gtType)); /* Remember the type of the reference */ @@ -3862,6 +3887,7 @@ void Compiler::lvaFixVirtualFrameOffsets() delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta(); } #endif //_TARGET_AMD64_ + unsigned lclNum; LclVarDsc * varDsc; for (lclNum = 0, varDsc = lvaTable; @@ -4358,7 +4384,10 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize /* Argument is passed in a register, don't count it * when updating the current offset on the stack */ -#ifndef _TARGET_ARM_ +#if !defined(_TARGET_ARMARCH_) + // TODO: Remove this noway_assert and replace occurrences of sizeof(void *) with argSize + // Also investigate why we are incrementing argOffs for X86 as this seems incorrect + // #if DEBUG noway_assert(argSize == sizeof(void *)); #endif // DEBUG @@ -4367,12 +4396,11 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize #if defined(_TARGET_X86_) argOffs += sizeof(void *); #elif defined(_TARGET_AMD64_) - // The offset for args needs to be set only for the stack homed arguments for System V. + // Register arguments on AMD64 also takes stack space. (in the backing store) varDsc->lvStkOffs = argOffs; - // Register arguments also take stack space. argOffs += sizeof(void *); #elif defined(_TARGET_ARM64_) - // Register arguments don't take stack space. + // Register arguments on ARM64 only take stack space when they have a frame home. #elif defined(_TARGET_ARM_) // On ARM we spill the registers in codeGen->regSet.rsMaskPreSpillRegArg // in the prolog, so we have to fill in lvStkOffs here @@ -4591,7 +4619,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs; } } - // For an independent promoted struct field we also assign the parent struct stack offset + // For an independent promoted struct field we also assign the parent struct stack offset else if (varDsc->lvIsStructField) { noway_assert(varDsc->lvParentLcl < lvaCount); @@ -4920,7 +4948,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() the promotion type is PROMOTION_TYPE_FIELD_DEPENDENT. */ if (lvaIsFieldOfDependentlyPromotedStruct(varDsc)) - { + { continue; } @@ -5105,6 +5133,18 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // Reserve the stack space for this variable stkOffs = lvaAllocLocalAndSetVirtualOffset(lclNum, lvaLclSize(lclNum), stkOffs); +#ifdef _TARGET_ARM64_ + // If we have an incoming register argument that has a struct promoted field + // then we need to copy the lvStkOff (the stack home) from the reg arg to the field lclvar + // + if (varDsc->lvIsRegArg && varDsc->lvPromotedStruct()) + { + noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + + unsigned fieldVarNum = varDsc->lvFieldLclStart; + lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs; + } +#endif } } @@ -5784,7 +5824,7 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t // The register or stack location field is 11 characters wide. if (varDsc->lvRefCnt == 0) { - printf("zero ref "); + printf("zero-ref "); } else if (varDsc->lvRegister != 0) { @@ -5793,7 +5833,7 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t } else if (varDsc->lvOnFrame == 0) { - printf("multi reg "); + printf("multi-reg "); } else { @@ -5822,7 +5862,7 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t { printf(" do-not-enreg["); if (varDsc->lvAddrExposed) printf("X"); - if (varDsc->lvType == TYP_STRUCT) printf("S"); + if (varTypeIsStruct(varDsc)) printf("S"); if (varDsc->lvVMNeedsStackAddr) printf("V"); if (varDsc->lvLiveInOutOfHndlr) printf("H"); if (varDsc->lvLclFieldExpr) printf("F"); @@ -5836,6 +5876,7 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t if (varDsc->lvMustInit) printf(" must-init"); if (varDsc->lvAddrExposed) printf(" addr-exposed"); + if (varDsc->lvHasLdAddrOp) printf(" ld-addr-op"); if (varDsc->lvVerTypeInfo.IsThisPtr()) printf(" this"); if (varDsc->lvPinned) printf(" pinned"); if (varDsc->lvRefAssign) printf(" ref-asgn"); @@ -5848,7 +5889,7 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t if (compGSReorderStackLayout && !varDsc->lvRegister) { if (varDsc->lvIsPtr) printf(" ptr"); - if (varDsc->lvIsUnsafeBuffer) printf(" unsafe_buffer"); + if (varDsc->lvIsUnsafeBuffer) printf(" unsafe-buffer"); } if (varDsc->lvIsStructField) { @@ -6342,132 +6383,6 @@ void Compiler::lvaStressLclFld() fgWalkAllTreesPre(lvaStressLclFldCB, &Args); } -/***************************************************************************** - * - * Callback for fgWalkAllTreesPre() - * Convert as many TYP_INT locals to TYP_DOUBLE. Hopefully they will get - * enregistered on the FP stack. - */ - -/* static */ -Compiler::fgWalkResult Compiler::lvaStressFloatLclsCB(GenTreePtr *pTree, fgWalkData *data) -{ - Compiler * pComp = data->compiler; - GenTreePtr tree = *pTree; - genTreeOps oper = tree->OperGet(); - GenTreePtr lcl; - - switch (oper) - { - case GT_LCL_VAR: - if (tree->gtFlags & GTF_VAR_DEF) - return WALK_CONTINUE; - - lcl = tree; - break; - - case GT_ASG: - if (tree->gtOp.gtOp1->gtOper != GT_LCL_VAR) - return WALK_CONTINUE; - lcl = tree->gtOp.gtOp1; - break; - - default: - assert(!tree->OperIsAssignment()); // Only GT_ASG expected at this time - return WALK_CONTINUE; - } - - unsigned lclNum = lcl->gtLclVarCommon.gtLclNum; - LclVarDsc * varDsc = &pComp->lvaTable[lclNum]; - - if (varDsc->lvIsParam || - varDsc->lvType != TYP_INT || - varDsc->lvAddrExposed || - varDsc->lvSafeAddrTaken || // We will convert a GT_IND(GT_ADDR(lcl)) to "lcl", but until the GT_ADDR node gets folded away, it gets confused if we change "lcl". - varDsc->lvKeepType) - { - return WALK_CONTINUE; - } - - // Leave some TYP_INTs unconverted for variety - if ((lclNum % 4) == 0) - return WALK_CONTINUE; - - // Mark it - - varDsc->lvDblWasInt = true; - pComp->compFloatingPointUsed = true; - - if (tree == lcl) - { - tree->ChangeOper(GT_COMMA); - tree->gtOp.gtOp1 = pComp->gtNewNothingNode(); - tree->gtOp.gtOp2 = pComp->gtNewCastNodeL(TYP_INT, - pComp->gtNewLclvNode(lclNum, TYP_DOUBLE), - TYP_INT); - - return WALK_SKIP_SUBTREES; - } - else - { - noway_assert(oper == GT_ASG && (lcl->gtFlags & GTF_VAR_DEF)); - bool specialCaseCallReturn = false; -#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM_) - // On amd64 and arm, we may have situations where we assign a TYP_STRUCT return to a TYP_INT - // temp. Eventually the call will be bashed to TYP_INT, but that happens in fgMorph() which - // is called after the stress morphing. - // (see the comment in impReturnInstruction() that begins with "This is a bit of a workaround..."). - if (tree->gtOp.gtOp2->OperGet() == GT_CALL && - tree->gtOp.gtOp2->TypeGet() == TYP_STRUCT && - tree->gtOp.gtOp2->gtCall.gtReturnType == TYP_INT) - { - specialCaseCallReturn = true; - } -#endif // defined(_TARGET_AMD64_) || defined(_TARGET_ARM_) - noway_assert(genActualType(tree->gtOp.gtOp2->gtType) == TYP_INT || - genActualType(tree->gtOp.gtOp2->gtType) == TYP_BYREF || - specialCaseCallReturn); - tree->gtOp.gtOp2 = pComp->gtNewCastNode(TYP_DOUBLE, - tree->gtOp.gtOp2, - TYP_DOUBLE); - tree->gtType = - lcl->gtType = TYP_DOUBLE; - - return WALK_CONTINUE; - } - -} - -/*****************************************************************************/ - -void Compiler::lvaStressFloatLcls() -{ - if (!compStressCompile(STRESS_ENREG_FP, 15)) - return; - - // Change the types of all the TYP_INT local variable nodes - - fgWalkAllTreesPre(lvaStressFloatLclsCB, (void*)this); - - // Also, change lvaTable accordingly - - for (unsigned lcl = 0; lcl < lvaCount; lcl++) - { - LclVarDsc * varDsc = &lvaTable[lcl]; - - if (varDsc->lvIsParam || - varDsc->lvType != TYP_INT || - varDsc->lvAddrExposed) - { - noway_assert(!varDsc->lvDblWasInt); - continue; - } - - if (varDsc->lvDblWasInt) - varDsc->lvType = TYP_DOUBLE; - } -} - /*****************************************************************************/ #endif // DEBUG /***************************************************************************** diff --git a/src/jit/liveness.cpp b/src/jit/liveness.cpp index dddb45fa28..ee750097b4 100644 --- a/src/jit/liveness.cpp +++ b/src/jit/liveness.cpp @@ -34,6 +34,7 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon *tree, GenTree * noway_assert(tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_VAR_ADDR || tree->gtOper == GT_LCL_FLD || + tree->gtOper == GT_LCL_FLD_ADDR || tree->gtOper == GT_STORE_LCL_VAR || tree->gtOper == GT_STORE_LCL_FLD); @@ -43,7 +44,7 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon *tree, GenTree * } else { - noway_assert(tree->gtOper == GT_LCL_FLD || tree->gtOper == GT_STORE_LCL_FLD); + noway_assert(tree->gtOper == GT_LCL_FLD || tree->gtOper == GT_LCL_FLD_ADDR || tree->gtOper == GT_STORE_LCL_FLD); lclNum = tree->gtLclFld.gtLclNum; } @@ -73,7 +74,7 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon *tree, GenTree * (tree != asgdLclVar) ) { /* bingo - we have an x = f(x) case */ - noway_assert(lvaTable[lhsLclNum].lvType != TYP_STRUCT || lvaTable[lhsLclNum].lvIsSIMDType()); + noway_assert(lvaTable[lhsLclNum].lvType != TYP_STRUCT); asgdLclVar->gtFlags |= GTF_VAR_USEDEF; rhsUSEDEF = true; } @@ -139,7 +140,7 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon *tree, GenTree * } } } - else if (varDsc->lvType == TYP_STRUCT) + else if (varTypeIsStruct(varDsc)) { noway_assert(!varDsc->lvTracked); @@ -161,10 +162,16 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon *tree, GenTree * } } - // Mark as used any struct fields that are not yet defined. - if (!VarSetOps::IsSubset(this, bitMask, fgCurDefSet)) + // For pure defs (i.e. not an "update" def which is also a use), add to the (all) def set. + if ((tree->gtFlags & GTF_VAR_DEF) != 0 && + (tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0) { - VarSetOps::UnionD(this, fgCurUseSet, bitMask); + VarSetOps::UnionD(this, fgCurDefSet, bitMask); + } + else if (!VarSetOps::IsSubset(this, bitMask, fgCurDefSet)) + { + // Mark as used any struct fields that are not yet defined. + VarSetOps::UnionD(this, fgCurUseSet, bitMask); } } } @@ -428,6 +435,8 @@ GenTreePtr Compiler::fgPerStatementLocalVarLiveness(GenTreePtr startNode, // T case GT_LCL_VAR: case GT_LCL_FLD: + case GT_LCL_VAR_ADDR: + case GT_LCL_FLD_ADDR: case GT_STORE_LCL_VAR: case GT_STORE_LCL_FLD: fgMarkUseDef(tree->AsLclVarCommon(), lhsNode); @@ -1071,7 +1080,7 @@ void Compiler::fgExtendDbgLifetimes() for (unsigned i = 0; i < lvaCount; i++) { LclVarDsc* varDsc = &lvaTable[i]; - if (varDsc->lvType == TYP_STRUCT && varDsc->lvTracked) + if (varTypeIsStruct(varDsc) && varDsc->lvTracked) { VarSetOps::AddElemD(this, noUnmarkVars, varDsc->lvVarIndex); } @@ -1587,7 +1596,7 @@ VARSET_VALRET_TP Compiler::fgUpdateLiveSet(VARSET_VALARG_TP liveSet, // We maintain the invariant that if the lclVarTree is a promoted struct, but the // the lookup fails, then all the field vars (i.e., "varBits") are dying. VARSET_TP* deadVarBits = NULL; - if (lclVarTree->TypeGet() == TYP_STRUCT && + if (varTypeIsStruct(lclVarTree) && GetPromotedStructDeathVars()->Lookup(lclVarTree, &deadVarBits)) { VarSetOps::DiffD(this, newLiveSet, *deadVarBits); @@ -1938,26 +1947,36 @@ SKIP_QMARK: #endif // INLINE_NDIRECT } - /* Is this a use/def of a local variable? */ - /* We will consider LDOBJ(ADDR(LCL[x])) to be the same as the use of x (and - any constituent field locals if x is a promoted struct local) -- since - the LDOBJ may be require an implementation that might itself allocate registers, - the variable(s) should stay live until the end of the LDOBJ. - (These tests probably don't need to be ARM-specific, FWIW.) - */ - - GenTreePtr lclVarTree = fgIsIndirOfAddrOfLocal(tree); - - // The method above returns nullptr if the tree is - // not an indir(addr(local)) so we assign it back to the - // original tree. + // Is this a use/def of a local variable? +#ifdef LEGACY_BACKEND + // Generally, the last use information is associated with the lclVar node. + // However, for LEGACY_BACKEND, the information must be associated + // with the LDOBJ itself for promoted structs. + // In that case, the LDOBJ may be require an implementation that might itself allocate registers, + // so the variable(s) should stay live until the end of the LDOBJ. + // Note that for promoted structs lvTracked is false. + + GenTreePtr lclVarTree = nullptr; + if (tree->gtOper == GT_LDOBJ) + { + // fgIsIndirOfAddrOfLocal returns nullptr if the tree is + // not an indir(addr(local)), in which case we will set lclVarTree + // back to the original tree, and not handle it as a use/def. + lclVarTree = fgIsIndirOfAddrOfLocal(tree); + if ((lclVarTree != nullptr) && + lvaTable[lclVarTree->gtLclVarCommon.gtLclNum].lvTracked) + { + lclVarTree = nullptr; + } + } if (lclVarTree == nullptr) { lclVarTree = tree; } - - // If lclVarTree is different than tree means we found an indir(addr(lclVar)) case. - if (tree->OperIsNonPhiLocal() || lclVarTree != tree) +#else // !LEGACY_BACKEND + GenTreePtr lclVarTree = tree; +#endif // !LEGACY_BACKEND + if (lclVarTree->OperIsNonPhiLocal() || lclVarTree->OperIsLocalAddr()) { lclNum = lclVarTree->gtLclVarCommon.gtLclNum; @@ -1975,7 +1994,7 @@ SKIP_QMARK: /* Is this a definition or use? */ - if (tree->gtFlags & GTF_VAR_DEF) + if (lclVarTree->gtFlags & GTF_VAR_DEF) { /* The variable is being defined here. The variable @@ -1994,7 +2013,7 @@ SKIP_QMARK: { /* The variable is live */ - if ((tree->gtFlags & GTF_VAR_USEASG) == 0) + if ((lclVarTree->gtFlags & GTF_VAR_USEASG) == 0) { /* Mark variable as dead from here to its closest use */ @@ -2017,7 +2036,7 @@ SKIP_QMARK: else { /* Dead assignment to the variable */ - tree->gtFlags |= GTF_VAR_DEATH; + lclVarTree->gtFlags |= GTF_VAR_DEATH; if (opts.MinOpts()) continue; @@ -2041,16 +2060,11 @@ SKIP_QMARK: } else // it is a use { - // For uses, we only process the lv directly (once), not an IND(ADDR(local)). - // This situation is true if the lclVarTree is different from the tree. - // TODO-Cleanup: Understand why we have to skip these uses, and describe it here. - if (lclVarTree != tree) - continue; - // Is the variable already known to be alive? if (VarSetOps::IsMember(this, life, varIndex)) { - lclVarTree->gtFlags &= ~GTF_VAR_DEATH; // Since we may call this multiple times, clear the GTF_VAR_DEATH if set. + // Since we may do liveness analysis multiple times, clear the GTF_VAR_DEATH if set. + lclVarTree->gtFlags &= ~GTF_VAR_DEATH; continue; } @@ -2077,7 +2091,7 @@ SKIP_QMARK: } // Note that promoted implies not tracked (i.e. only the fields are tracked). - else if (varDsc->lvType == TYP_STRUCT) + else if (varTypeIsStruct(varDsc->lvType)) { noway_assert(!varDsc->lvTracked); @@ -2104,6 +2118,13 @@ SKIP_QMARK: VarSetOps::AddElemD(this, varBit, varIndex); } } + if (tree->gtFlags & GTF_VAR_DEF) + { + VarSetOps::DiffD(this, varBit, keepAliveVars); + VarSetOps::DiffD(this, life, varBit); + continue; + } + // This is a use. // Are the variables already known to be alive? if (VarSetOps::IsSubset(this, varBit, life)) @@ -2229,7 +2250,7 @@ SKIP_QMARK: // fgRemoveDeadStore - remove a store to a local which has no exposed uses. // -// pTree - GenTree** to an assign node (pre-rationalize) or store-form local (post-rationalize) +// pTree - GenTree** to local, including store-form local or local addr (post-rationalize) // varDsc - var that is being stored to // life - current live tracked vars (maintained as we walk backwards) // doAgain - out parameter, true if we should restart the statement @@ -2239,75 +2260,88 @@ SKIP_QMARK: bool Compiler::fgRemoveDeadStore(GenTree** pTree, LclVarDsc* varDsc, VARSET_TP life, bool *doAgain, bool* pStmtInfoDirty DEBUGARG(bool* treeModf)) { - GenTree* asgNode; - GenTree* rhsNode; + GenTree* asgNode = nullptr; + GenTree* rhsNode = nullptr; + GenTree* addrNode = nullptr; GenTree* const tree = *pTree; - if (tree->OperIsStore()) + GenTree* nextNode = tree->gtNext; + + // First, characterize the lclVarTree and see if we are taking its address. + if (tree->OperIsLocalStore()) { + rhsNode = tree->gtOp.gtOp1; asgNode = tree; - if (tree->gtOper == GT_STOREIND) - { - rhsNode = asgNode->gtOp.gtOp2; - } - else - { - rhsNode = asgNode->gtOp.gtOp1; - } - -#ifndef LEGACY_BACKEND - // TODO-CQ: correct liveness for structs that are not address-exposed. - // currently initblk and cpblk are not handled - if (tree->gtOper == GT_STORE_LCL_FLD) - { - return false; - } -#endif // !LEGACY_BACKEND } - else if (tree->TypeGet() == TYP_STRUCT) + else if(tree->OperIsLocal()) { - // This lclVar is a dead struct def. We expect it to be defined as part of a GT_COPYBLK - // tree with the following structure: - // - // /--rhsNode This is the rhs of the struct (BEFORE 'tree', so we won't traverse this). - // | /--tree This is lclVar node we're looking at. - // +--addrNode This is the GT_ADDR of 'tree'. - // /--listNode - // +--sizeNode - // asgNode The GT_COPYBLK which IS the assignment. - // - // TODO-CQ: consider adding the post-rationalizer case where addrNode and tree subtree can be a GT_LCL_VAR_ADDR. - // - // If it has this structure, then we can consider it just like a regular assignment, and can - // eliminate it if rhsNode is side-effect free. - - assert((tree->gtNext->gtFlags & GTF_ASG) == 0); - GenTree* addrNode = tree->gtNext; - if (addrNode == nullptr || addrNode->OperGet() != GT_ADDR) + if (nextNode == nullptr) { return false; } - GenTree* listNode = addrNode->gtNext; - if (listNode == nullptr || listNode->OperGet() != GT_LIST || listNode->gtGetOp1() != addrNode) + if (nextNode->OperGet() == GT_ADDR) { - return false; + addrNode = nextNode; + nextNode = nextNode->gtNext; } - GenTree* sizeNode = listNode->gtNext; - if (sizeNode == nullptr || sizeNode->OperGet() != GT_CNS_INT) + } + else + { + assert(tree->OperIsLocalAddr()); + addrNode = tree; + } + + // Next, find the assignment. + if (asgNode == nullptr) + { + if (addrNode == nullptr) { - return false; + asgNode = nextNode; } - asgNode = sizeNode->gtNext; - if (!asgNode->OperIsBlkOp()) + else if (asgNode == nullptr) { - return false; + // This may be followed by GT_IND/assign, GT_STOREIND or GT_LIST/block-op. + if (nextNode == nullptr) + { + return false; + } + switch(nextNode->OperGet()) + { + default: + break; + case GT_IND: + asgNode = nextNode->gtNext; + break; + case GT_STOREIND: + asgNode = nextNode; + break; + case GT_LIST: + { + GenTree* sizeNode = nextNode->gtNext; + if ((sizeNode == nullptr) || (sizeNode->OperGet() != GT_CNS_INT)) + { + return false; + } + asgNode = sizeNode->gtNext; + rhsNode = nextNode->gtGetOp2(); + } + break; + } } - rhsNode = listNode->gtGetOp2(); } - else + + if (asgNode == nullptr) { - asgNode = tree->gtNext; - rhsNode = asgNode->gtOp.gtOp2; + return false; + } + + if (asgNode->OperIsAssignment()) + { + rhsNode = asgNode->gtGetOp2(); + } + else if (rhsNode == nullptr) + { + return false; } if (asgNode && (asgNode->gtFlags & GTF_ASG)) diff --git a/src/jit/loopcloning.h b/src/jit/loopcloning.h index ac7ed0cc7c..1e74789d27 100644 --- a/src/jit/loopcloning.h +++ b/src/jit/loopcloning.h @@ -521,13 +521,13 @@ struct LC_Deref void Print(unsigned indent = 0) { unsigned tab = 4 * indent; - JITDUMP("%*s%d,%d => {", tab, "", Lcl(), level); + printf("%*s%d,%d => {", tab, "", Lcl(), level); if (children != nullptr) { for (unsigned i = 0; i < children->Size(); ++i) { - if (i > 0) { JITDUMP(","); } - JITDUMP("\n"); + if (i > 0) { printf(","); } + printf("\n"); #ifdef _MSC_VER (*children)[i]->Print(indent + 1); #else // _MSC_VER @@ -535,7 +535,7 @@ struct LC_Deref #endif // _MSC_VER } } - JITDUMP("\n%*s}", tab, ""); + printf("\n%*s}", tab, ""); } #endif }; diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index 7a67e3f734..3dad01a71b 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -134,6 +134,24 @@ Compiler::fgWalkResult Lowering::LowerNodeHelper(GenTreePtr* pTree, Compiler::fg return Compiler::WALK_CONTINUE; } +#if !defined(_TARGET_64BIT_) +genTreeOps getHiOper(genTreeOps oper) +{ + switch(oper) + { + case GT_ADD: return GT_ADD_HI; break; + case GT_SUB: return GT_SUB_HI; break; + case GT_MUL: return GT_MUL_HI; break; + case GT_DIV: return GT_DIV_HI; break; + case GT_MOD: return GT_MOD_HI; break; + case GT_OR: return GT_OR; break; + case GT_AND: return GT_AND; break; + case GT_XOR: return GT_XOR; break; + } + assert(!"getHiOper called for invalid oper"); + return GT_NONE; +} +#endif // !defined(_TARGET_64BIT_) //------------------------------------------------------------------------ // DecomposeNode: Decompose long-type trees into lower & upper halves. @@ -265,6 +283,7 @@ void Lowering::DecomposeNode(GenTreePtr* pTree, Compiler::fgWalkData* data) unsigned hiVarNum = loVarNum + 1; tree->AsLclVarCommon()->SetLclNum(loVarNum); hiStore->SetOper(GT_STORE_LCL_VAR); + hiStore->AsLclVarCommon()->SetLclNum(hiVarNum); } else { @@ -370,6 +389,7 @@ void Lowering::DecomposeNode(GenTreePtr* pTree, Compiler::fgWalkData* data) GenTree* hiOp1 = op1->gtGetOp2(); comp->fgSnipNode(curStmt, op1); loResult = tree; + loResult->gtType = TYP_INT; loResult->gtOp.gtOp1 = loOp1; loOp1->gtNext = loResult; loResult->gtPrev = loOp1; @@ -382,15 +402,49 @@ void Lowering::DecomposeNode(GenTreePtr* pTree, Compiler::fgWalkData* data) case GT_NEG: NYI("GT_NEG of TYP_LONG"); break; - // Binary operators whose long result is simply the concatenation of the int result - // on its constituent halves: + // Binary operators. Those that require different computation for upper and lower half are + // handled by the use of getHiOper(). + case GT_ADD: case GT_OR: case GT_XOR: case GT_AND: - NYI("Logical binary operators on TYP_LONG"); + { + NYI_IF((tree->gtFlags & GTF_REVERSE_OPS) != 0, "Binary operator with GTF_REVERSE_OPS"); + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + // Both operands must have already been decomposed into GT_LONG operators. + noway_assert((op1->OperGet() == GT_LONG) && (op2->OperGet() == GT_LONG)); + // Capture the lo and hi halves of op1 and op2. + GenTree* loOp1 = op1->gtGetOp1(); + GenTree* hiOp1 = op1->gtGetOp2(); + GenTree* loOp2 = op2->gtGetOp1(); + GenTree* hiOp2 = op2->gtGetOp2(); + // Now, remove op1 and op2 from the node list. + comp->fgSnipNode(curStmt, op1); + comp->fgSnipNode(curStmt, op2); + // We will reuse "tree" for the loResult, which will now be of TYP_INT, and its operands + // will be the lo halves of op1 from above. + loResult = tree; + loResult->gtType = TYP_INT; + loResult->gtOp.gtOp1 = loOp1; + loResult->gtOp.gtOp2 = loOp2; + // The various halves will be correctly threaded internally. We simply need to + // relink them into the proper order, i.e. loOp1 is followed by loOp2, and then + // the loResult node. + // (This rethreading, and that below, are where we need to address the reverse ops case). + loOp1->gtNext = loOp2; + loOp2->gtPrev = loOp1; + loOp2->gtNext = loResult; + loResult->gtPrev = loOp2; + + // We will now create a new tree for the hiResult, and then thread these nodes as above. + hiResult = new (comp, oper) GenTreeOp(getHiOper(oper), TYP_INT, hiOp1, hiOp2); + hiOp1->gtNext = hiOp2; + hiOp2->gtPrev = hiOp1; + hiOp2->gtNext = hiResult; + hiResult->gtPrev = hiOp2; + } break; - // Binary operators whose upper and lower halves require different computation. - case GT_ADD: case GT_SUB: case GT_MUL: case GT_DIV: @@ -1070,9 +1124,8 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP if (!isOnStack) { #ifdef FEATURE_SIMD - // We can have SIMD types that are handled as TYP_DOUBLE, but which need to be - // passed in integer registers. We need the putArg node to be of the int type. - if (type == TYP_DOUBLE && genIsValidIntReg(fp->regNum)) + // TYP_SIMD8 is passed in an integer register. We need the putArg node to be of the int type. + if (type == TYP_SIMD8 && genIsValidIntReg(fp->regNum)) { type = TYP_LONG; } @@ -1658,7 +1711,11 @@ void Lowering::CheckVSQuirkStackPaddingNeeded(GenTreeCall* call) if (op1->OperGet() == GT_LCL_VAR_ADDR) { unsigned lclNum = op1->AsLclVarCommon()->GetLclNum(); - if(comp->lvaTable[lclNum].TypeGet() == TYP_STRUCT) + // TODO-1stClassStructs: This is here to duplicate previous behavior, + // but is not needed because the scenario being quirked did not involve + // a SIMD or enregisterable struct. + // if(comp->lvaTable[lclNum].TypeGet() == TYP_STRUCT) + if (varTypeIsStruct(comp->lvaTable[lclNum].TypeGet())) { // First arg is addr of a struct local. paddingNeeded = true; @@ -1807,7 +1864,7 @@ void Lowering::LowerFastTailCall(GenTreeCall *call) // a method returns. This is a case of caller method has both PInvokes and tail calls. if (comp->info.compCallUnmanaged) { - InsertPInvokeMethodEpilog(comp->compCurBB); + InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call)); } #endif @@ -2048,7 +2105,7 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree *callTarget // a method returns. This is a case of caller method has both PInvokes and tail calls. if (comp->info.compCallUnmanaged) { - InsertPInvokeMethodEpilog(comp->compCurBB); + InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call)); } #endif @@ -2117,7 +2174,7 @@ void Lowering::LowerJmpMethod(GenTree* jmp) // a method returns. if (comp->info.compCallUnmanaged) { - InsertPInvokeMethodEpilog(comp->compCurBB); + InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(jmp)); } #endif } @@ -2135,7 +2192,7 @@ void Lowering::LowerRet(GenTree* ret) // Method doing PInvokes has exactly one return block unless it has tail calls. if (comp->info.compCallUnmanaged && (comp->compCurBB == comp->genReturnBB)) { - InsertPInvokeMethodEpilog(comp->compCurBB); + InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(ret)); } #endif } @@ -2365,7 +2422,7 @@ GenTree* Lowering::SetGCState(int state) GenTree* base = new(comp, GT_LCL_VAR) GenTreeLclVar(TYP_I_IMPL, comp->info.compLvFrameListRoot, -1); GenTree* storeGcState = new(comp, GT_STOREIND) - GenTreeIndir(GT_STOREIND, TYP_BYTE, + GenTreeStoreInd(TYP_BYTE, new(comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, base, nullptr, 1, pInfo->offsetOfGCState), @@ -2404,9 +2461,7 @@ GenTree* Lowering::CreateFrameLinkUpdate(FrameLinkAction action) data = new(comp, GT_LCL_FLD) GenTreeLclFld(GT_LCL_FLD, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameLink); } - GenTree* storeInd = new(comp, GT_STOREIND) - GenTreeIndir(GT_STOREIND, TYP_I_IMPL, addr, data); - + GenTree* storeInd = new(comp, GT_STOREIND) GenTreeStoreInd(TYP_I_IMPL, addr, data); return storeInd; } @@ -2486,27 +2541,65 @@ void Lowering::InsertPInvokeMethodProlog() } } -// code that needs to be run when exiting any method that has pinvoke inlines +// Code that needs to be run when exiting any method that has pinvoke inlines // this needs to be inserted any place you can exit the function: returns, tailcalls and jmps -void Lowering::InsertPInvokeMethodEpilog(BasicBlock *returnBB) +// +// Parameters +// returnBB - basic block from which a method can return +// lastExpr - Gentree of the last top level stmnt of returnBB (debug only arg) +void Lowering::InsertPInvokeMethodEpilog(BasicBlock *returnBB + DEBUGARG(GenTreePtr lastExpr) ) { assert(returnBB != nullptr); assert(comp->info.compCallUnmanaged); // Method doing Pinvoke calls has exactly one return block unless it has "jmp" or tail calls. - assert(((returnBB == comp->genReturnBB) && (returnBB->bbJumpKind == BBJ_RETURN)) || returnBB->endsWithTailCallOrJmp(comp)); +#ifdef DEBUG + bool endsWithTailCallOrJmp = false; +#if FEATURE_FASTTAILCALL + endsWithTailCallOrJmp = returnBB->endsWithTailCallOrJmp(comp); +#endif // FEATURE_FASTTAILCALL + assert(((returnBB == comp->genReturnBB) && (returnBB->bbJumpKind == BBJ_RETURN)) || endsWithTailCallOrJmp); +#endif // DEBUG + + GenTreeStmt* lastTopLevelStmt = comp->fgGetLastTopLevelStmt(returnBB)->AsStmt(); + GenTreePtr lastTopLevelStmtExpr = lastTopLevelStmt->gtStmtExpr; + + // Gentree of the last top level stmnt should match. + assert(lastTopLevelStmtExpr == lastExpr); + // Note: PInvoke Method Epilog (PME) needs to be inserted just before GT_RETURN, GT_JMP or GT_CALL node in execution order + // so that it is guaranteed that there will be no further PInvokes after that point in the method. + // + // Example1: GT_RETURN(op1) - say execution order is: Op1, GT_RETURN. After inserting PME, execution order would be + // Op1, PME, GT_RETURN + // + // Example2: GT_CALL(arg side effect computing nodes, Stk Args Setup, Reg Args setup). The execution order would be + // arg side effect computing nodes, Stk Args setup, Reg Args setup, GT_CALL + // After inserting PME execution order would be: + // arg side effect computing nodes, Stk Args setup, Reg Args setup, PME, GT_CALL + // + // Example3: GT_JMP. After inserting PME execution order would be: PME, GT_JMP + // That is after PME, args for GT_JMP call will be setup. + + // TODO-Cleanup: setting GCState to 1 seems to be redundant as InsertPInvokeCallProlog will set it to zero before a PInvoke + // call and InsertPInvokeCallEpilog() will set it back to 1 after the PInvoke. Though this is redundant, it is harmeless. + // Note that liveness is artificially extending the life of compLvFrameListRoot var if the method being compiled has + // pinvokes. Deleting the below stmnt would cause an an assert in lsra.cpp::SetLastUses() since compLvFrameListRoot + // will be live-in to a BBJ_RETURN block without any uses. Long term we need to fix liveness for x64 case to properly + // extend the life of compLvFrameListRoot var. + // // Thread.offsetOfGcState = 0/1 // That is [tcb + offsetOfGcState] = 1 GenTree* storeGCState = SetGCState(1); - comp->fgInsertStmtNearEnd(returnBB, LowerMorphAndSeqTree(storeGCState)); + comp->fgInsertTreeBeforeAsEmbedded(storeGCState, lastTopLevelStmtExpr, lastTopLevelStmt, returnBB); if (comp->opts.eeFlags & CORJIT_FLG_IL_STUB) { // Pop the frame, in non-stubs we do this around each pinvoke call GenTree* frameUpd = CreateFrameLinkUpdate(PopFrame); - comp->fgInsertStmtNearEnd(returnBB, LowerMorphAndSeqTree(frameUpd)); + comp->fgInsertTreeBeforeAsEmbedded(frameUpd, lastTopLevelStmtExpr, lastTopLevelStmt, returnBB); } } @@ -3142,9 +3235,6 @@ void Lowering::LowerInd(GenTreePtr* pTree) { GenTreePtr newNode = NULL; GenTreePtr cTree = *pTree; - GenTreePtr base, index; - unsigned scale, offset; - bool rev; JITDUMP("\n"); DISPNODE(cTree); @@ -3157,6 +3247,12 @@ void Lowering::LowerInd(GenTreePtr* pTree) LowerAddrMode(&cTree->gtOp.gtOp1, before, nullptr, true); + // Mark all GT_STOREIND nodes to indicate that it is not known + // whether it represents a RMW memory op. + if (cTree->OperGet() == GT_STOREIND) + { + cTree->AsStoreInd()->SetRMWStatusDefault(); + } } //------------------------------------------------------------------------ @@ -3614,7 +3710,7 @@ void Lowering::DoPhase() * * This is a first iteration to actually recognize trees that can be code-generated * as a single read-modify-write instruction on AMD64/x86. For now - * this method only supports the recognition of simple addresing modes (through GT_LEA) + * this method only supports the recognition of simple addressing modes (through GT_LEA) * or local var indirections. Local fields, array access and other more complex nodes are * not yet supported. * @@ -3625,9 +3721,18 @@ bool Lowering::IndirsAreEquivalent(GenTreePtr candidate, GenTreePtr storeInd) { assert(candidate->OperGet() == GT_IND); assert(storeInd->OperGet() == GT_STOREIND); + + // We should check the size of the indirections. If they are + // different, say because of a cast, then we can't call them equivalent. Doing so could cause us + // to drop a cast. + // Signed-ness difference is okay and expected since a store indirection must always + // be signed based on the CIL spec, but a load could be unsigned. + if (genTypeSize(candidate->gtType) != genTypeSize(storeInd->gtType)) + return false; + GenTreePtr pTreeA = candidate->gtGetOp1(); GenTreePtr pTreeB = storeInd->gtGetOp1(); - + // This method will be called by codegen (as well as during lowering). // After register allocation, the sources may have been spilled and reloaded // to a different register, indicated by an inserted GT_RELOAD node. @@ -3640,14 +3745,12 @@ bool Lowering::IndirsAreEquivalent(GenTreePtr candidate, GenTreePtr storeInd) if (pTreeA->OperGet() != pTreeB->OperGet()) return false; - if (genTypeSize(candidate->gtType) != genTypeSize(storeInd->gtType)) - return false; - oper = pTreeA->OperGet(); switch (oper) { case GT_LCL_VAR: - case GT_CLS_VAR_ADDR: + case GT_LCL_VAR_ADDR: + case GT_CLS_VAR_ADDR: case GT_CNS_INT: return NodesAreEquivalentLeaves(pTreeA, pTreeB); @@ -3682,8 +3785,8 @@ bool Lowering::NodesAreEquivalentLeaves(GenTreePtr tree1, GenTreePtr tree2) tree1 = tree1->gtSkipReloadOrCopy(); tree2 = tree2->gtSkipReloadOrCopy(); - if (tree1->TypeGet() != tree2->TypeGet()) - return false; + if (tree1->TypeGet() != tree2->TypeGet()) + return false; if (tree1->OperGet() != tree2->OperGet()) return false; @@ -3694,11 +3797,13 @@ bool Lowering::NodesAreEquivalentLeaves(GenTreePtr tree1, GenTreePtr tree2) switch (tree1->OperGet()) { case GT_CNS_INT: - return tree1->gtIntCon.gtIconVal == tree2->gtIntCon.gtIconVal; + return tree1->gtIntCon.gtIconVal == tree2->gtIntCon.gtIconVal && + tree1->IsIconHandle() == tree2->IsIconHandle(); case GT_LCL_VAR: + case GT_LCL_VAR_ADDR: return tree1->gtLclVarCommon.gtLclNum == tree2->gtLclVarCommon.gtLclNum; - case GT_CLS_VAR_ADDR: - return tree1->gtClsVar.gtClsVarHnd == tree2->gtClsVar.gtClsVarHnd; + case GT_CLS_VAR_ADDR: + return tree1->gtClsVar.gtClsVarHnd == tree2->gtClsVar.gtClsVarHnd; default: return false; } @@ -3793,6 +3898,121 @@ void Lowering::SimpleLinkNodeAfter(GenTree* prevTree, GenTree* newTree) } } + +#ifdef _TARGET_64BIT_ +/** + * Get common information required to handle a cast instruction + * + * Right now only supports 64 bit targets. In order to support 32 bit targets the + * switch statement needs work. + * + */ +void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo) +{ + // Intialize castInfo + memset(castInfo, 0, sizeof(*castInfo)); + + GenTreePtr castOp = treeNode->gtCast.CastOp(); + + var_types dstType = treeNode->CastToType(); + var_types srcType = castOp->TypeGet(); + + castInfo->unsignedDest = varTypeIsUnsigned(dstType); + castInfo->unsignedSource = varTypeIsUnsigned(srcType); + + // If necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set. + if (!castInfo->unsignedSource && (treeNode->gtFlags & GTF_UNSIGNED) != 0) + { + srcType = genUnsignedType(srcType); + castInfo->unsignedSource = true; + } + + if (treeNode->gtOverflow() && (genTypeSize(srcType) >= genTypeSize(dstType) || (srcType == TYP_INT && dstType == TYP_ULONG))) + { + castInfo->requiresOverflowCheck = true; + } + + if (castInfo->requiresOverflowCheck) + { + ssize_t typeMin = 0; + ssize_t typeMax = 0; + ssize_t typeMask = 0; + bool signCheckOnly = false; + + // Do we need to compare the value, or just check masks + + switch (dstType) + { + default: + assert(!"unreachable: getCastDescription"); + break; + + case TYP_BYTE: + typeMask = ssize_t((int)0xFFFFFF80); + typeMin = SCHAR_MIN; + typeMax = SCHAR_MAX; + break; + + case TYP_UBYTE: + typeMask = ssize_t((int)0xFFFFFF00L); + break; + + case TYP_SHORT: + typeMask = ssize_t((int)0xFFFF8000); + typeMin = SHRT_MIN; + typeMax = SHRT_MAX; + break; + + case TYP_CHAR: + typeMask = ssize_t((int)0xFFFF0000L); + break; + + case TYP_INT: + if (srcType == TYP_UINT) + { + signCheckOnly = true; + } + else + { + typeMask = 0xFFFFFFFF80000000LL; + typeMin = INT_MIN; + typeMax = INT_MAX; + } + break; + + case TYP_UINT: + if (srcType == TYP_INT) + { + signCheckOnly = true; + } + else + { + typeMask = 0xFFFFFFFF00000000LL; + } + break; + + case TYP_LONG: + signCheckOnly = true; + break; + + case TYP_ULONG: + signCheckOnly = true; + break; + } + + if (signCheckOnly) + { + castInfo->signCheckOnly = true; + } + + castInfo->typeMax = typeMax; + castInfo->typeMin = typeMin; + castInfo->typeMask = typeMask; + } +} + +#endif // _TARGET_64BIT_ + #ifdef DEBUG void Lowering::DumpNodeInfoMap() { diff --git a/src/jit/lower.h b/src/jit/lower.h index 03f7a6e71f..bef01bea88 100644 --- a/src/jit/lower.h +++ b/src/jit/lower.h @@ -31,6 +31,24 @@ public: } virtual void DoPhase(); + // If requiresOverflowCheck is false, all other values will be unset + struct CastInfo + { + bool requiresOverflowCheck; // Will the cast require an overflow check + bool unsignedSource; // Is the source unsigned + bool unsignedDest; // is the dest unsigned + + // All other fields are only meaningful if requiresOverflowCheck is set. + + ssize_t typeMin; // Lowest storable value of the dest type + ssize_t typeMax; // Highest storable value of the dest type + ssize_t typeMask; // For converting from/to unsigned + bool signCheckOnly; // For converting between unsigned/signed int + }; + +#ifdef _TARGET_64BIT_ + static void getCastDescription(GenTreePtr treeNode, CastInfo* castInfo); +#endif // _TARGET_64BIT_ private: // Friends @@ -65,7 +83,7 @@ private: void InsertPInvokeCallProlog (GenTreeCall* call); void InsertPInvokeCallEpilog (GenTreeCall* call); void InsertPInvokeMethodProlog(); - void InsertPInvokeMethodEpilog(BasicBlock *returnBB); + void InsertPInvokeMethodEpilog(BasicBlock *returnBB DEBUGARG(GenTreePtr lastExpr)); GenTree *SetGCState(int cns); GenTree *CreateReturnTrapSeq(); enum FrameLinkAction { PushFrame, PopFrame }; @@ -130,6 +148,7 @@ private: void TreeNodeInfoInitCall(GenTreePtr tree, TreeNodeInfo &info, int &srcCount, int &dstCount); #endif // _TARGET_ARM_ void TreeNodeInfoInitStructArg(GenTreePtr structArg); + void TreeNodeInfoInitBlockStore(GenTreeBlkOp* blkNode); #ifdef FEATURE_SIMD void TreeNodeInfoInitSIMD(GenTree* tree, LinearScan* lsra); #endif // FEATURE_SIMD @@ -154,11 +173,14 @@ private: void SetMulOpCounts(GenTreePtr tree); void LowerCmp(GenTreePtr tree); + #if !CPU_LOAD_STORE_ARCH - bool LowerStoreInd(GenTreePtr tree); + bool IsBinOpInRMWStoreInd(GenTreePtr tree); + bool IsRMWMemOpRootedAtStoreInd(GenTreePtr storeIndTree, GenTreePtr *indirCandidate, GenTreePtr *indirOpSource); + bool SetStoreIndOpCountsIfRMWMemOp(GenTreePtr storeInd); #endif void LowerStoreLoc(GenTreeLclVarCommon* tree); - void HandleIndirAddressExpression(GenTree *indirTree, GenTree* tree); + void SetIndirAddrOpCounts(GenTree *indirTree); void LowerGCWriteBarrier(GenTree *tree); void LowerArrElem(GenTree **ppTree, Compiler::fgWalkData* data); void LowerRotate(GenTree *tree); @@ -169,8 +191,6 @@ public: static bool IndirsAreEquivalent (GenTreePtr pTreeA, GenTreePtr pTreeB); private: static bool NodesAreEquivalentLeaves (GenTreePtr candidate, GenTreePtr storeInd); - void SetStoreIndOpCounts (GenTreePtr storeInd, - GenTreePtr indirCandidate); GenTreePtr CreateLocalTempAsg (GenTreePtr rhs, unsigned refCount, GenTreePtr *ppLclVar = nullptr); diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp index a9f6196dc6..e88583a5bb 100644 --- a/src/jit/lowerarm64.cpp +++ b/src/jit/lowerarm64.cpp @@ -47,9 +47,14 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) unsigned varNum = storeLoc->gtLclNum; LclVarDsc* varDsc = comp->lvaTable + varNum; + if (varDsc->lvIsSIMDType()) + { + noway_assert(storeLoc->gtType != TYP_STRUCT); + } + unsigned size = genTypeSize(storeLoc); // If we are storing a constant into a local variable // we extend the size of the store here - if (genTypeSize(storeLoc) < 4) + if ((size < 4) && !varTypeIsStruct(varDsc)) { if (!varTypeIsUnsigned(varDsc)) { @@ -343,10 +348,17 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) break; case GT_MUL: - if (tree->gtOverflow()) + if ((tree->gtFlags & GTF_UNSIGNED) != 0) { + // unsigned mul should only need one register info->internalIntCount = 1; } + else if (tree->gtOverflow()) + { + // Need a register different from target reg to check + // for signed overflow. + info->internalIntCount = 2; + } __fallthrough; case GT_DIV: @@ -359,58 +371,21 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) } break; - case GT_MATH: + case GT_INTRINSIC: { - NYI("Math intrinsics"); -#if 0 // TODO-ARM64-NYI - // Right now only Sqrt/Abs are treated as math intrinsics - noway_assert((tree->gtMath.gtMathFN == CORINFO_INTRINSIC_Sqrt) || - (tree->gtMath.gtMathFN == CORINFO_INTRINSIC_Abs)); + // Right now only Abs/Round/Sqrt are treated as math intrinsics + noway_assert((tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) || + (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) || + (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt) ); - // Both operand and its result must be of floating point type. + // Both operand and its result must be of the same floating point type. op1 = tree->gtOp.gtOp1; assert(varTypeIsFloating(op1)); assert(op1->TypeGet() == tree->TypeGet()); info->srcCount = 1; info->dstCount = 1; - - switch (tree->gtMath.gtMathFN) - { - case CORINFO_INTRINSIC_Sqrt: - if (op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl()) - { - MakeSrcContained(tree, op1); - } - break; - - case CORINFO_INTRINSIC_Abs: - // Abs(float x) = x & 0x7fffffff - // Abs(double x) = x & 0x7ffffff ffffffff - - // In case of Abs we need an internal register to hold mask. - - // TODO-ARM64-CQ: avoid using an internal register for the mask. - // Andps or andpd both will operate on 128-bit operands. - // The data section constant to hold the mask is a 64-bit size. - // Therefore, we need both the operand and mask to be in - // xmm register. When we add support in emitter to emit 128-bit - // data constants and instructions that operate on 128-bit - // memory operands we can avoid the need for an internal register. - if (tree->gtMath.gtMathFN == CORINFO_INTRINSIC_Abs) - { - info->internalFloatCount = 1; - info->setInternalCandidates(l, l->internalFloatRegCandidates()); - } - break; - - default: - assert(!"Unsupported math intrinsic"); - unreached(); - break; - } -#endif // 0 } break; @@ -439,33 +414,37 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) { castOpType = genUnsignedType(castOpType); } - +#ifdef DEBUG if (!tree->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(castOpType))) { -#ifdef DEBUG // If converting to float/double, the operand must be 4 or 8 byte in size. if (varTypeIsFloating(castToType)) { unsigned opSize = genTypeSize(castOpType); assert(opSize == 4 || opSize == 8); } + } #endif //DEBUG + // Some overflow checks need a temp reg - // U8 -> R8 conversion requires that the operand be in a register. - if (castOpType != TYP_ULONG) - { - if (castOp->isMemoryOp() || castOp->IsCnsNonZeroFltOrDbl()) - { - MakeSrcContained(tree, castOp); - } - } - } + CastInfo castInfo; - // some overflow checks need a temp reg: - // - GT_CAST from INT64/UINT64 to UINT32 - if (tree->gtOverflow() && (castToType == TYP_UINT)) + // Get information about the cast. + getCastDescription(tree, &castInfo); + + if (castInfo.requiresOverflowCheck) { - if (genTypeSize(castOpType) == 8) + var_types srcType = castOp->TypeGet(); + emitAttr cmpSize = EA_ATTR(genTypeSize(srcType)); + + // If we cannot store the comparisons in an immediate for either + // comparing against the max or min value, then we will need to + // reserve a temporary register. + + bool canStoreMaxValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, cmpSize); + bool canStoreMinValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, cmpSize); + + if (!canStoreMaxValue || !canStoreMinValue) { info->internalIntCount = 1; } @@ -634,21 +613,27 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) if (argNode->TypeGet() == TYP_STRUCT) { + GenTreePtr actualArgNode = argNode; + if (actualArgNode->gtOper == GT_PUTARG_REG) + { + actualArgNode = actualArgNode->gtOp.gtOp1; + } unsigned originalSize = 0; bool isPromoted = false; LclVarDsc* varDsc = nullptr; - if (argNode->gtOper == GT_LCL_VAR) + if (actualArgNode->gtOper == GT_LCL_VAR) { - varDsc = compiler->lvaTable + argNode->gtLclVarCommon.gtLclNum; + varDsc = compiler->lvaTable + actualArgNode->gtLclVarCommon.gtLclNum; originalSize = varDsc->lvSize(); } - else if (argNode->gtOper == GT_MKREFANY) + else if (actualArgNode->gtOper == GT_MKREFANY) { originalSize = 2 * TARGET_POINTER_SIZE; } - else if (argNode->gtOper == GT_LDOBJ) + else if (actualArgNode->gtOper == GT_LDOBJ) { - noway_assert(!"GT_LDOBJ not supported for arm64"); + CORINFO_CLASS_HANDLE ldObjClass = actualArgNode->gtLdObj.gtClass; + originalSize = compiler->info.compCompHnd->getClassSize(ldObjClass); } else { @@ -687,11 +672,9 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) } argNode->gtLsraInfo.internalIntCount = internalIntCount; } - else - { - argNode->gtLsraInfo.setDstCandidates(l, argMask); - argNode->gtLsraInfo.setSrcCandidates(l, argMask); - } + + argNode->gtLsraInfo.setDstCandidates(l, argMask); + argNode->gtLsraInfo.setSrcCandidates(l, argMask); // To avoid redundant moves, have the argument child tree computed in the // register in which the argument is passed to the call. @@ -754,185 +737,9 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) break; case GT_INITBLK: - { - // Sources are dest address, initVal and size - info->srcCount = 3; - info->dstCount = 0; - - GenTreeInitBlk* initBlkNode = tree->AsInitBlk(); - - GenTreePtr blockSize = initBlkNode->Size(); - GenTreePtr dstAddr = initBlkNode->Dest(); - GenTreePtr initVal = initBlkNode->InitVal(); - - // TODO-ARM64-CQ: Currently we generate a helper call for every - // initblk we encounter. Later on we should implement loop unrolling - // code sequences to improve CQ. - // For reference see the code in LowerXArch.cpp. - -#if 0 - // If we have an InitBlk with constant block size we can speed this up by unrolling the loop. - if (blockSize->IsCnsIntOrI() && - blockSize->gtIntCon.gtIconVal <= INITBLK_UNROLL_LIMIT && - && initVal->IsCnsIntOrI()) - { - ssize_t size = blockSize->gtIntCon.gtIconVal; - // Replace the integer constant in initVal - // to fill an 8-byte word with the fill value of the InitBlk - assert(initVal->gtIntCon.gtIconVal == (initVal->gtIntCon.gtIconVal & 0xFF)); - if (size < REGSIZE_BYTES) - { - initVal->gtIntCon.gtIconVal = 0x01010101 * initVal->gtIntCon.gtIconVal; - } - else - { - initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * initVal->gtIntCon.gtIconVal; - initVal->gtType = TYP_LONG; - } - - MakeSrcContained(tree, blockSize); - - // In case we have a buffer >= 16 bytes - // we can use SSE2 to do a 128-bit store in a single - // instruction. - if (size >= XMM_REGSIZE_BYTES) - { - // Reserve an XMM register to fill it with - // a pack of 16 init value constants. - info->internalFloatCount = 1; - info->setInternalCandidates(l, l->internalFloatRegCandidates()); - } - initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindUnroll; - } - } - else -#endif // 0 - { - // The helper follows the regular AMD64 ABI. - dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); - initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); - blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); - initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindHelper; - } - } - break; - - case GT_COPYOBJ: - { - // Sources are src, dest and size (or class token for CpObj). - info->srcCount = 3; - info->dstCount = 0; - - GenTreeCpObj* cpObjNode = tree->AsCpObj(); - - GenTreePtr clsTok = cpObjNode->ClsTok(); - GenTreePtr dstAddr = cpObjNode->Dest(); - GenTreePtr srcAddr = cpObjNode->Source(); - - unsigned slots = cpObjNode->gtSlots; - -#ifdef DEBUG - // CpObj must always have at least one GC-Pointer as a member. - assert(cpObjNode->gtGcPtrCount > 0); - - assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL); - assert(clsTok->IsIconHandle()); - - CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)clsTok->gtIntCon.gtIconVal; - size_t classSize = compiler->info.compCompHnd->getClassSize(clsHnd); - size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE); - - // Currently, the EE always round up a class data structure so - // we are not handling the case where we have a non multiple of pointer sized - // struct. This behavior may change in the future so in order to keeps things correct - // let's assert it just to be safe. Going forward we should simply - // handle this case. - assert(classSize == blkSize); - assert((blkSize / TARGET_POINTER_SIZE) == slots); - assert((cpObjNode->gtFlags & GTF_BLK_HASGCPTR) != 0); -#endif - - // We don't need to materialize the struct size but we still need - // a temporary register to perform the sequence of loads and stores. - MakeSrcContained(tree, clsTok); - info->internalIntCount = 1; - - dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF); - srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF); - } - break; - case GT_COPYBLK: - { - // Sources are src, dest and size (or class token for CpObj). - info->srcCount = 3; - info->dstCount = 0; - - GenTreeCpBlk* cpBlkNode = tree->AsCpBlk(); - - GenTreePtr blockSize = cpBlkNode->Size(); - GenTreePtr dstAddr = cpBlkNode->Dest(); - GenTreePtr srcAddr = cpBlkNode->Source(); - - // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size - // we should unroll the loop to improve CQ. - - // TODO-ARM64-CQ: cpblk loop unrolling is currently not implemented. -#if 0 - if (blockSize->IsCnsIntOrI() && blockSize->gtIntCon.gtIconVal <= CPBLK_UNROLL_LIMIT) - { - assert(!blockSize->IsIconHandle()); - ssize_t size = blockSize->gtIntCon.gtIconVal; - - // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2. - // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of - // our framework assemblies, so this is the main code generation scheme we'll use. - if ((size & (XMM_REGSIZE_BYTES - 1)) != 0) - { - info->internalIntCount++; - info->addInternalCandidates(l, l->allRegs(TYP_INT)); - } - - if (size >= XMM_REGSIZE_BYTES) - { - // If we have a buffer larger than XMM_REGSIZE_BYTES, - // reserve an XMM register to use it for a - // series of 16-byte loads and stores. - info->internalFloatCount = 1; - info->addInternalCandidates(l, l->internalFloatRegCandidates()); - } - - // If src or dst are on stack, we don't have to generate the address into a register - // because it's just some constant+SP - if (srcAddr->OperIsLocalAddr()) - { - MakeSrcContained(tree, srcAddr); - } - - if (dstAddr->OperIsLocalAddr()) - { - MakeSrcContained(tree, dstAddr); - } - - cpBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindUnroll; - } - else -#endif // 0 - { - // In case we have a constant integer this means we went beyond - // CPBLK_UNROLL_LIMIT bytes of size, still we should never have the case of - // any GC-Pointers in the src struct. - if (blockSize->IsCnsIntOrI()) - { - assert(!blockSize->IsIconHandle()); - } - - dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); - srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); - blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); - cpBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindHelper; - } - } + case GT_COPYOBJ: + TreeNodeInfoInitBlockStore(tree->AsBlkOp()); break; case GT_LCLHEAP: @@ -1072,6 +879,13 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) case GT_ARR_INDEX: info->srcCount = 2; info->dstCount = 1; + + // We need one internal register when generating code for GT_ARR_INDEX, however the + // register allocator always may just give us the same one as it gives us for the 'dst' + // as a workaround we will just ask for two internal registers. + // + info->internalIntCount = 2; + // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple // times while the result is being computed. tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true; @@ -1084,6 +898,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) info->srcCount = 3; info->dstCount = 1; info->internalIntCount = 1; + // we don't want to generate code for this if (tree->gtArrOffs.gtOffset->IsZero()) { @@ -1092,19 +907,39 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) break; case GT_LEA: - // The LEA usually passes its operands through to the GT_IND, in which case we'll - // clear the info->srcCount and info->dstCount later, but we may be instantiating an address, - // so we set them here. - info->srcCount = 0; - if (tree->AsAddrMode()->Base() != nullptr) - { - info->srcCount++; - } - if (tree->AsAddrMode()->Index() != nullptr) { - info->srcCount++; + GenTreeAddrMode* lea = tree->AsAddrMode(); + + GenTree* base = lea->Base(); + GenTree* index = lea->Index(); + unsigned cns = lea->gtOffset; + + // This LEA is instantiating an address, + // so we set up the srcCount and dstCount here. + info->srcCount = 0; + if (base != nullptr) + { + info->srcCount++; + } + if (index != nullptr) + { + info->srcCount++; + } + info->dstCount = 1; + + // On ARM64 we may need a single internal register + // (when both conditions are true then we still only need a single internal register) + if ((index != nullptr) && (cns != 0)) + { + // ARM64 does not support both Index and offset so we need an internal register + info->internalIntCount = 1; + } + else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE)) + { + // This offset can't be contained in the add instruction, so we need an internal register + info->internalIntCount = 1; + } } - info->dstCount = 1; break; case GT_STOREIND: @@ -1124,26 +959,22 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) MakeSrcContained(tree, src); } - GenTreePtr addr = tree->gtOp.gtOp1; - - HandleIndirAddressExpression(tree, addr); + SetIndirAddrOpCounts(tree); } break; case GT_NULLCHECK: + info->dstCount = 0; + info->srcCount = 1; info->isLocalDefUse = true; - - __fallthrough; + // null check is an indirection on an addr + SetIndirAddrOpCounts(tree); + break; case GT_IND: - { - info->dstCount = tree->OperGet() == GT_NULLCHECK ? 0 : 1; - info->srcCount = 1; - - GenTreePtr addr = tree->gtOp.gtOp1; - - HandleIndirAddressExpression(tree, addr); - } + info->dstCount = 1; + info->srcCount = 1; + SetIndirAddrOpCounts(tree); break; case GT_CATCH_ARG: @@ -1172,6 +1003,193 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) } } +//------------------------------------------------------------------------ +// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store. +// +// Arguments: +// blkNode - The block store node of interest +// +// Return Value: +// None. +// +// Notes: + +void +Lowering::TreeNodeInfoInitBlockStore(GenTreeBlkOp* blkNode) +{ + GenTree* dstAddr = blkNode->Dest(); + unsigned size; + LinearScan* l = m_lsra; + Compiler* compiler = comp; + + // Sources are dest address, initVal or source, and size + blkNode->gtLsraInfo.srcCount = 3; + blkNode->gtLsraInfo.dstCount = 0; + + if (blkNode->OperGet() == GT_INITBLK) + { + GenTreeInitBlk* initBlkNode = blkNode->AsInitBlk(); + + GenTreePtr blockSize = initBlkNode->Size(); + GenTreePtr initVal = initBlkNode->InitVal(); + + // TODO-ARM64-CQ: Currently we generate a helper call for every + // initblk we encounter. Later on we should implement loop unrolling + // code sequences to improve CQ. + // For reference see the code in LowerXArch.cpp. + +#if 0 + // If we have an InitBlk with constant block size we can speed this up by unrolling the loop. + if (blockSize->IsCnsIntOrI() && + blockSize->gtIntCon.gtIconVal <= INITBLK_UNROLL_LIMIT && + && initVal->IsCnsIntOrI()) + { + ssize_t size = blockSize->gtIntCon.gtIconVal; + // Replace the integer constant in initVal + // to fill an 8-byte word with the fill value of the InitBlk + assert(initVal->gtIntCon.gtIconVal == (initVal->gtIntCon.gtIconVal & 0xFF)); + if (size < REGSIZE_BYTES) + { + initVal->gtIntCon.gtIconVal = 0x01010101 * initVal->gtIntCon.gtIconVal; + } + else + { + initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * initVal->gtIntCon.gtIconVal; + initVal->gtType = TYP_LONG; + } + + MakeSrcContained(blkNode, blockSize); + + // In case we have a buffer >= 16 bytes + // we can use SSE2 to do a 128-bit store in a single + // instruction. + if (size >= XMM_REGSIZE_BYTES) + { + // Reserve an XMM register to fill it with + // a pack of 16 init value constants. + blkNode->gtLsraInfo.internalFloatCount = 1; + blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates()); + } + initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindUnroll; + } + } + else +#endif // 0 + { + // The helper follows the regular AMD64 ABI. + dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); + initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); + blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); + initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindHelper; + } + } + else if (blkNode->OperGet() == GT_COPYOBJ) + { + GenTreeCpObj* cpObjNode = blkNode->AsCpObj(); + + GenTreePtr clsTok = cpObjNode->ClsTok(); + GenTreePtr srcAddr = cpObjNode->Source(); + + unsigned slots = cpObjNode->gtSlots; + +#ifdef DEBUG + // CpObj must always have at least one GC-Pointer as a member. + assert(cpObjNode->gtGcPtrCount > 0); + + assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL); + assert(clsTok->IsIconHandle()); + + CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)clsTok->gtIntCon.gtIconVal; + size_t classSize = compiler->info.compCompHnd->getClassSize(clsHnd); + size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE); + + // Currently, the EE always round up a class data structure so + // we are not handling the case where we have a non multiple of pointer sized + // struct. This behavior may change in the future so in order to keeps things correct + // let's assert it just to be safe. Going forward we should simply + // handle this case. + assert(classSize == blkSize); + assert((blkSize / TARGET_POINTER_SIZE) == slots); + assert((cpObjNode->gtFlags & GTF_BLK_HASGCPTR) != 0); +#endif + + // We don't need to materialize the struct size but we still need + // a temporary register to perform the sequence of loads and stores. + MakeSrcContained(blkNode, clsTok); + blkNode->gtLsraInfo.internalIntCount = 1; + + dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF); + srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF); + } + else + { + assert(blkNode->OperGet() == GT_COPYBLK); + GenTreeCpBlk* cpBlkNode = blkNode->AsCpBlk(); + + GenTreePtr blockSize = cpBlkNode->Size(); + GenTreePtr srcAddr = cpBlkNode->Source(); + + // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size + // we should unroll the loop to improve CQ. + + // TODO-ARM64-CQ: cpblk loop unrolling is currently not implemented. +#if 0 + if (blockSize->IsCnsIntOrI() && blockSize->gtIntCon.gtIconVal <= CPBLK_UNROLL_LIMIT) + { + assert(!blockSize->IsIconHandle()); + ssize_t size = blockSize->gtIntCon.gtIconVal; + + // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2. + // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of + // our framework assemblies, so this is the main code generation scheme we'll use. + if ((size & (XMM_REGSIZE_BYTES - 1)) != 0) + { + blkNode->gtLsraInfo.internalIntCount++; + blkNode->gtLsraInfo.addInternalCandidates(l, l->allRegs(TYP_INT)); + } + + if (size >= XMM_REGSIZE_BYTES) + { + // If we have a buffer larger than XMM_REGSIZE_BYTES, + // reserve an XMM register to use it for a + // series of 16-byte loads and stores. + blkNode->gtLsraInfo.internalFloatCount = 1; + blkNode->gtLsraInfo.addInternalCandidates(l, l->internalFloatRegCandidates()); + } + + // If src or dst are on stack, we don't have to generate the address into a register + // because it's just some constant+SP + if (srcAddr->OperIsLocalAddr()) + { + MakeSrcContained(blkNode, srcAddr); + } + + if (dstAddr->OperIsLocalAddr()) + { + MakeSrcContained(blkNode, dstAddr); + } + + cpBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindUnroll; + } + else +#endif // 0 + { + // In case we have a constant integer this means we went beyond + // CPBLK_UNROLL_LIMIT bytes of size, still we should never have the case of + // any GC-Pointers in the src struct. + if (blockSize->IsCnsIntOrI()) + { + assert(!blockSize->IsIconHandle()); + } + + dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); + srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); + blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); + cpBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindHelper; + } + } +} + #ifdef FEATURE_SIMD //------------------------------------------------------------------------ // TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree. @@ -1422,21 +1440,32 @@ void Lowering::LowerGCWriteBarrier(GenTree* tree) assert(src->gtLsraInfo.dstCount == 1); } - -void Lowering::HandleIndirAddressExpression(GenTree* indirTree, GenTree* addr) +//----------------------------------------------------------------------------------------- +// Specify register requirements for address expression of an indirection operation. +// +// Arguments: +// indirTree - GT_IND, GT_STOREIND or GT_NULLCHECK gentree node +// +void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree) { - GenTree* base = nullptr; - GenTree* index = nullptr; - unsigned mul, cns; - bool rev; - bool modifiedSources = false; + assert(indirTree->OperIsIndir()); + + GenTreePtr addr = indirTree->gtGetOp1(); TreeNodeInfo* info = &(indirTree->gtLsraInfo); + GenTreePtr base = nullptr; + GenTreePtr index = nullptr; + unsigned cns = 0; + unsigned mul; + bool rev; + bool modifiedSources = false; + if (addr->OperGet() == GT_LEA) { GenTreeAddrMode* lea = addr->AsAddrMode(); base = lea->Base(); index = lea->Index(); + cns = lea->gtOffset; m_lsra->clearOperandCounts(addr); // The srcCount is decremented because addr is now "contained", @@ -1444,7 +1473,7 @@ void Lowering::HandleIndirAddressExpression(GenTree* indirTree, GenTree* addr) info->srcCount--; } else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) - && !(modifiedSources = AreSourcesPossiblyModified(indirTree, base, index))) + && !(modifiedSources = AreSourcesPossiblyModified(indirTree, base, index))) { // An addressing mode will be constructed that may cause some // nodes to not need a register, and cause others' lifetimes to be extended @@ -1542,6 +1571,19 @@ void Lowering::HandleIndirAddressExpression(GenTree* indirTree, GenTree* addr) { info->srcCount++; } + + // On ARM64 we may need a single internal register + // (when both conditions are true then we still only need a single internal register) + if ((index != nullptr) && (cns != 0)) + { + // ARM64 does not support both Index and offset so we need an internal register + info->internalIntCount = 1; + } + else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree))) + { + // This offset can't be contained in the ldr/str instruction, so we need an internal register + info->internalIntCount = 1; + } } @@ -1630,7 +1672,7 @@ void Lowering::LowerRotate(GenTreePtr tree) { if (tree->OperGet() == GT_ROL) { - // There is no ROL instruction on ARM. Convert rol into ROR. + // There is no ROL instruction on ARM. Convert ROL into ROR. GenTreePtr rotatedValue = tree->gtOp.gtOp1; unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8; GenTreePtr rotateLeftIndexNode = tree->gtOp.gtOp2; @@ -1651,42 +1693,6 @@ void Lowering::LowerRotate(GenTreePtr tree) } } -// TODO-Cleanup: move to Lower.cpp? -void Lowering::SetStoreIndOpCounts(GenTreePtr storeInd, GenTreePtr indirCandidate) -{ - GenTreePtr indirDst = storeInd->gtGetOp1(); - GenTreePtr indirSrc = storeInd->gtGetOp2(); - TreeNodeInfo* info = &(storeInd->gtLsraInfo); - - info->dstCount = 0; - - m_lsra->clearOperandCounts(indirSrc); - m_lsra->clearOperandCounts(indirCandidate); - GenTreePtr indirCandidateChild = indirCandidate->gtGetOp1(); - if (indirCandidateChild->OperGet() == GT_LEA) - { - GenTreeAddrMode* addrMode = indirCandidateChild->AsAddrMode(); - assert(addrMode->Base()->OperIsLeaf()); - m_lsra->clearOperandCounts(addrMode->Base()); - info->srcCount++; - - if (addrMode->Index() != nullptr) - { - assert(addrMode->Index()->OperIsLeaf()); - m_lsra->clearOperandCounts(addrMode->Index()); - info->srcCount++; - } - - m_lsra->clearOperandCounts(indirDst); - } - else - { - assert(indirCandidateChild->OperGet() == GT_LCL_VAR); - info->srcCount += indirCandidateChild->gtLsraInfo.dstCount; - } - m_lsra->clearOperandCounts(indirCandidateChild); -} - // returns true if the tree can use the read-modify-write memory instruction form bool Lowering::isRMWRegOper(GenTreePtr tree) { diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 4c1dc1a957..14412b7f66 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -29,6 +29,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "jit.h" #include "lower.h" +// xarch supports both ROL and ROR instructions so no lowering is required. +void Lowering::LowerRotate(GenTreePtr tree) +{ +} + // there is not much lowering to do with storing a local but // we do some handling of contained immediates and widening operations of unsigneds void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) @@ -67,9 +72,14 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) unsigned varNum = storeLoc->gtLclNum; LclVarDsc* varDsc = comp->lvaTable + varNum; + if (varDsc->lvIsSIMDType()) + { + noway_assert(storeLoc->gtType != TYP_STRUCT); + } + unsigned size = genTypeSize(storeLoc); // If we are storing a constant into a local variable // we extend the size of the store here - if (genTypeSize(storeLoc) < 4) + if ((size < 4) && !varTypeIsStruct(varDsc)) { if (!varTypeIsUnsigned(varDsc)) { @@ -413,11 +423,25 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) info->srcCount = 2; info->dstCount = 1; + op1 = tree->gtOp.gtOp1; op2 = tree->gtOp.gtOp2; if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl()) { MakeSrcContained(tree, op2); } + else if (tree->OperIsCommutative() && + (op1->IsCnsNonZeroFltOrDbl() || (op1->isMemoryOp() && IsSafeToContainMem(tree, op1)))) + { + // Though we have GT_ADD(op1=memOp, op2=non-memOp, we try to reorder the operands + // as long as it is safe so that the following efficient code sequence is generated: + // addss/sd targetReg, memOp (if op1Reg == targetReg) OR + // movaps targetReg, op2Reg; addss/sd targetReg, [memOp] + // + // Instead of + // movss op1Reg, [memOp]; addss/sd targetReg, Op2Reg (if op1Reg == targetReg) OR + // movss op1Reg, [memOp]; movaps targetReg, op1Reg, addss/sd targetReg, Op2Reg + MakeSrcContained(tree, op1); + } break; } @@ -430,29 +454,49 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) // We're not marking a constant hanging on the left of the add // as containable so we assign it to a register having CQ impact. // TODO-XArch-CQ: Detect this case and support both generating a single instruction - // for GT_ADD(Constant, SomeTree) and GT_ADD(SomeTree, Constant) + // for GT_ADD(Constant, SomeTree) info->srcCount = 2; info->dstCount = 1; - op2 = tree->gtOp.gtOp2; + op1 = tree->gtOp.gtOp1; + op2 = tree->gtOp.gtOp2; - // We can directly encode the second operand if it is either a containable constant or a local field. - // In case of local field, we can encode it directly provided its type matches with 'tree' type. + // We can directly encode the second operand if it is either a containable constant or a memory-op. + // In case of memory-op, we can encode it directly provided its type matches with 'tree' type. // This is because during codegen, type of 'tree' is used to determine emit Type size. If the types // do not match, they get normalized (i.e. sign/zero extended) on load into a register. bool directlyEncodable = false; + GenTreePtr operand = nullptr; + if (IsContainableImmed(tree, op2)) { directlyEncodable = true; + operand = op2; } - else if ((tree->gtOp.gtOp1->gtOper != GT_IND) && op2->isLclField() && tree->TypeGet() == op2->TypeGet()) + else if (!IsBinOpInRMWStoreInd(tree)) { - directlyEncodable = true; + if (op2->isMemoryOp() && tree->TypeGet() == op2->TypeGet()) + { + directlyEncodable = true; + operand = op2; + } + else if (tree->OperIsCommutative()) + { + if(IsContainableImmed(tree, op1) || + (op1->isMemoryOp() && tree->TypeGet() == op1->TypeGet() && IsSafeToContainMem(tree, op1)) + ) + { + // If it is safe, we can reverse the order of operands of commutative operations for efficient codegen + directlyEncodable = true; + operand = op1; + + } + } } if (directlyEncodable) { - l->clearDstCount(op2); - info->srcCount = 1; + assert(operand != nullptr); + MakeSrcContained(tree, operand); } } break; @@ -561,7 +605,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) SetMulOpCounts(tree); break; - case GT_MATH: + case GT_INTRINSIC: { // Both operand and its result must be of floating point type. op1 = tree->gtOp.gtOp1; @@ -571,7 +615,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) info->srcCount = 1; info->dstCount = 1; - switch(tree->gtMath.gtMathFN) + switch(tree->gtIntrinsic.gtIntrinsicId) { case CORINFO_INTRINSIC_Sqrt: if (op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl()) @@ -593,7 +637,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) // xmm register. When we add support in emitter to emit 128-bit // data constants and instructions that operate on 128-bit // memory operands we can avoid the need for an internal register. - if (tree->gtMath.gtMathFN == CORINFO_INTRINSIC_Abs) + if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) { info->internalFloatCount = 1; info->setInternalCandidates(l, l->internalFloatRegCandidates()); @@ -1152,196 +1196,11 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) #endif //_TARGET_X86_ case GT_INITBLK: - { - // Sources are dest address, initVal and size - info->srcCount = 3; - info->dstCount = 0; - - GenTreeInitBlk* initBlkNode = tree->AsInitBlk(); - - GenTreePtr blockSize = initBlkNode->Size(); - GenTreePtr dstAddr = initBlkNode->Dest(); - GenTreePtr initVal = initBlkNode->InitVal(); - - // If we have an InitBlk with constant block size we can optimize several ways: - // a) If the size is smaller than a small memory page but larger than INITBLK_UNROLL_LIMIT bytes - // we use rep stosb since this reduces the register pressure in LSRA and we have - // roughly the same performance as calling the helper. - // b) If the size is <= INITBLK_UNROLL_LIMIT bytes and the fill byte is a constant, - // we can speed this up by unrolling the loop using SSE2 stores. The reason for - // this threshold is because our last investigation (Fall 2013), more than 95% of initblks - // in our framework assemblies are actually <= INITBLK_UNROLL_LIMIT bytes size, so this is the - // preferred code sequence for the vast majority of cases. - - // This threshold will decide from using the helper or let the JIT decide to inline - // a code sequence of its choice. - ssize_t helperThreshold = max(INITBLK_STOS_LIMIT, INITBLK_UNROLL_LIMIT); - - if (blockSize->IsCnsIntOrI() && blockSize->gtIntCon.gtIconVal <= helperThreshold) - { - ssize_t size = blockSize->gtIntCon.gtIconVal; - - // Always favor unrolling vs rep stos. - if (size <= INITBLK_UNROLL_LIMIT && initVal->IsCnsIntOrI()) - { - // Replace the integer constant in initVal - // to fill an 8-byte word with the fill value of the InitBlk - assert(initVal->gtIntCon.gtIconVal == (initVal->gtIntCon.gtIconVal & 0xFF)); -#ifdef _TARGET_AMD64_ - if (size < REGSIZE_BYTES) - { - initVal->gtIntCon.gtIconVal = 0x01010101 * initVal->gtIntCon.gtIconVal; - } - else - { - initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * initVal->gtIntCon.gtIconVal; - initVal->gtType = TYP_LONG; - } -#else // !_TARGET_AMD64_ - initVal->gtIntCon.gtIconVal = 0x01010101 * initVal->gtIntCon.gtIconVal; -#endif // !_TARGET_AMD64_ - - MakeSrcContained(tree, blockSize); - - // In case we have a buffer >= 16 bytes - // we can use SSE2 to do a 128-bit store in a single - // instruction. - if (size >= XMM_REGSIZE_BYTES) - { - // Reserve an XMM register to fill it with - // a pack of 16 init value constants. - info->internalFloatCount = 1; - info->setInternalCandidates(l, l->internalFloatRegCandidates()); - } - initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindUnroll; - } - else - { - // rep stos has the following register requirements: - // a) The memory address to be in RDI. - // b) The fill value has to be in RAX. - // c) The buffer size must be in RCX. - dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_RDI); - initVal->gtLsraInfo.setSrcCandidates(l, RBM_RAX); - blockSize->gtLsraInfo.setSrcCandidates(l, RBM_RCX); - initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindRepInstr; - } - } - else - { -#ifdef _TARGET_AMD64_ - // The helper follows the regular AMD64 ABI. - dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); - initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); - blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); - initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindHelper; -#else // !_TARGET_AMD64_ - NYI("InitBlk helper call for RyuJIT/x86"); -#endif // !_TARGET_AMD64_ - } - break; - } - + case GT_COPYBLK: case GT_COPYOBJ: - { - // Sources are src, dest and size (or class token for CpObj). - info->srcCount = 3; - info->dstCount = 0; - - GenTreeCpObj* cpObjNode = tree->AsCpObj(); - - GenTreePtr clsTok = cpObjNode->ClsTok(); - GenTreePtr dstAddr = cpObjNode->Dest(); - GenTreePtr srcAddr = cpObjNode->Source(); - - unsigned slots = cpObjNode->gtSlots; - -#ifdef DEBUG - // CpObj must always have at least one GC-Pointer as a member. - assert(cpObjNode->gtGcPtrCount > 0); - - assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL); - assert(clsTok->IsIconHandle()); - - CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)clsTok->gtIntCon.gtIconVal; - size_t classSize = compiler->info.compCompHnd->getClassSize(clsHnd); - size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE); - - // Currently, the EE always round up a class data structure so - // we are not handling the case where we have a non multiple of pointer sized - // struct. This behavior may change in the future so in order to keeps things correct - // let's assert it just to be safe. Going forward we should simply - // handle this case. - assert(classSize == blkSize); - assert((blkSize / TARGET_POINTER_SIZE) == slots); - assert((cpObjNode->gtFlags & GTF_BLK_HASGCPTR) != 0); -#endif - - bool IsRepMovsProfitable = false; - - // If the destination is not on the stack, let's find out if we - // can improve code size by using rep movsq instead of generating - // sequences of movsq instructions. - if (!dstAddr->OperIsLocalAddr()) - { - // Let's inspect the struct/class layout and determine if it's profitable - // to use rep movsq for copying non-gc memory instead of using single movsq - // instructions for each memory slot. - unsigned i = 0; - BYTE* gcPtrs = cpObjNode->gtGcPtrs; - - do { - unsigned nonGCSlots = 0; - // Measure a contiguous non-gc area inside the struct and note the maximum. - while (i < slots && gcPtrs[i] == TYPE_GC_NONE) - { - nonGCSlots++; - i++; - } - - while (i < slots && gcPtrs[i] != TYPE_GC_NONE) - { - i++; - } - - if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT) - { - IsRepMovsProfitable = true; - break; - } - } while (i < slots); - } - else if (slots >= CPOBJ_NONGC_SLOTS_LIMIT) - { - IsRepMovsProfitable = true; - } + TreeNodeInfoInitBlockStore(tree->AsBlkOp()); + break; - // There are two cases in which we need to materialize the - // struct size: - // a) When the destination is on the stack we don't need to use the - // write barrier, we can just simply call rep movsq and get a win in codesize. - // b) If we determine we have contiguous non-gc regions in the struct where it's profitable - // to use rep movsq instead of a sequence of single movsq instructions. According to the - // Intel Manual, the sweet spot for small structs is between 4 to 12 slots of size where - // the entire operation takes 20 cycles and encodes in 5 bytes (moving RCX, and calling rep movsq). - if (IsRepMovsProfitable) - { - // We need the size of the contiguous Non-GC-region to be in RCX to call rep movsq. - MakeSrcContained(tree, clsTok); - info->internalIntCount = 1; - info->setInternalCandidates(l, RBM_RCX); - } - else - { - // We don't need to materialize the struct size because we will unroll - // the loop using movsq that automatically increments the pointers. - MakeSrcContained(tree, clsTok); - } - - dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_RDI); - srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_RSI); - } - break; #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING case GT_PUTARG_STK: @@ -1353,12 +1212,12 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) } GenTreePutArgStk* putArgStkTree = tree->AsPutArgStk(); - + GenTreePtr dstAddr = tree; GenTreePtr srcAddr = tree->gtOp.gtOp1; assert(srcAddr->OperGet() == GT_LDOBJ); - info->srcCount = srcAddr->gtLsraInfo.dstCount; + info->srcCount = srcAddr->gtLsraInfo.dstCount; // If this is a stack variable address, // make the op1 contained, so this way @@ -1369,23 +1228,23 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) { info->srcCount += 1; } - + info->dstCount = 0; - + // In case of a CpBlk we could use a helper call. In case of putarg_stk we // can't do that since the helper call could kill some already set up outgoing args. // TODO-Amd64-Unix: converge the code for putarg_stk with cpyblk/cpyobj. // The cpyXXXX code is rather complex and this could cause it to be more complex, but // it might be the right thing to do. - + // This threshold will decide from using the helper or let the JIT decide to inline // a code sequence of its choice. ssize_t helperThreshold = max(CPBLK_MOVS_LIMIT, CPBLK_UNROLL_LIMIT); ssize_t size = putArgStkTree->gtNumSlots * TARGET_POINTER_SIZE; - + // TODO-X86-CQ: The helper call either is not supported on x86 or required more work // (I don't know which). - + // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2. // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of // our framework assemblies, so this is the main code generation scheme we'll use. @@ -1400,7 +1259,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) { info->internalIntCount++; regMaskTP regMask = l->allRegs(TYP_INT); - + #ifdef _TARGET_X86_ if ((size % 2) != 0) { @@ -1409,7 +1268,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) #endif info->setInternalCandidates(l, regMask); } - + if (size >= XMM_REGSIZE_BYTES) { // If we have a buffer larger than XMM_REGSIZE_BYTES, @@ -1418,12 +1277,12 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) info->internalFloatCount = 1; info->addInternalCandidates(l, l->internalFloatRegCandidates()); } - + if (srcAddr->gtGetOp1()->OperIsLocalAddr()) { MakeSrcContained(putArgStkTree, srcAddr->gtGetOp1()); } - + // If src or dst are on stack, we don't have to generate the address into a register // because it's just some constant+SP putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindUnroll; @@ -1436,127 +1295,22 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) { MakeSrcContained(putArgStkTree, srcAddr->gtGetOp1()); } - + putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindRepInstr; } - + // Always mark the LDOBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree. MakeSrcContained(putArgStkTree, srcAddr); - + // Balance up the inc above. if (srcAddr->gtGetOp1()->OperIsLocalAddr()) { info->srcCount -= 1; } } - - break; -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING - case GT_COPYBLK: - { - // Sources are src, dest and size (or class token for CpObj). - info->srcCount = 3; - info->dstCount = 0; - - GenTreeCpBlk* cpBlkNode = tree->AsCpBlk(); - - GenTreePtr blockSize = cpBlkNode->Size(); - GenTreePtr dstAddr = cpBlkNode->Dest(); - GenTreePtr srcAddr = cpBlkNode->Source(); - - // In case of a CpBlk with a constant size and less than CPBLK_MOVS_LIMIT size - // we can use rep movs to generate code instead of the helper call. - - // This threshold will decide from using the helper or let the JIT decide to inline - // a code sequence of its choice. - ssize_t helperThreshold = max(CPBLK_MOVS_LIMIT, CPBLK_UNROLL_LIMIT); - - // TODO-X86-CQ: The helper call either is not supported on x86 or required more work - // (I don't know which). -#ifdef _TARGET_AMD64_ - if (blockSize->IsCnsIntOrI() && blockSize->gtIntCon.gtIconVal <= helperThreshold) -#endif // _TARGET_AMD64_ - { - assert(!blockSize->IsIconHandle()); - ssize_t size = blockSize->gtIntCon.gtIconVal; - - // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2. - // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of - // our framework assemblies, so this is the main code generation scheme we'll use. - if (size <= CPBLK_UNROLL_LIMIT) - { - MakeSrcContained(tree, blockSize); - - // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg. - // - // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte. - // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude - // RBM_NON_BYTE_REGS from internal candidates. - if ((size & (XMM_REGSIZE_BYTES - 1)) != 0) - { - info->internalIntCount++; - regMaskTP regMask = l->allRegs(TYP_INT); - -#ifdef _TARGET_X86_ - if ((size % 2) != 0) - { - regMask &= ~RBM_NON_BYTE_REGS; - } -#endif - info->setInternalCandidates(l, regMask); - } - - if (size >= XMM_REGSIZE_BYTES) - { - // If we have a buffer larger than XMM_REGSIZE_BYTES, - // reserve an XMM register to use it for a - // series of 16-byte loads and stores. - info->internalFloatCount = 1; - info->addInternalCandidates(l, l->internalFloatRegCandidates()); - } - - // If src or dst are on stack, we don't have to generate the address into a register - // because it's just some constant+SP - if (srcAddr->OperIsLocalAddr()) - { - MakeSrcContained(tree, srcAddr); - } - - if (dstAddr->OperIsLocalAddr()) - { - MakeSrcContained(tree, dstAddr); - } - - cpBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindUnroll; - } - else - { - dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_RDI); - srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_RSI); - blockSize->gtLsraInfo.setSrcCandidates(l, RBM_RCX); - cpBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindRepInstr; - } - } -#ifdef _TARGET_AMD64_ - else - { - // In case we have a constant integer this means we went beyond - // CPBLK_MOVS_LIMIT bytes of size, still we should never have the case of - // any GC-Pointers in the src struct. - if (blockSize->IsCnsIntOrI()) - { - assert(!blockSize->IsIconHandle()); - } - - dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); - srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); - blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); - cpBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindHelper; - } -#endif // _TARGET_AMD64_ - } break; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING case GT_LCLHEAP: { @@ -1665,29 +1419,29 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) // Consumes arrLen & index - has no result info->srcCount = 2; info->dstCount = 0; - - GenTree* intCns = nullptr; - GenTree* other = nullptr; + + GenTreePtr other = nullptr; if (CheckImmedAndMakeContained(tree, node->gtIndex)) { - intCns = node->gtIndex; other = node->gtArrLen; } else if (CheckImmedAndMakeContained(tree, node->gtArrLen)) { - intCns = node->gtArrLen; other = node->gtIndex; } - else + else if (node->gtIndex->isMemoryOp()) { other = node->gtIndex; } + else + { + other = node->gtArrLen; + } - if (other->isMemoryOp()) + if (other->isMemoryOp() && node->gtIndex->TypeGet() == node->gtArrLen->TypeGet()) { MakeSrcContained(tree, other); } - } break; @@ -1725,11 +1479,11 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) // clear the info->srcCount and info->dstCount later, but we may be instantiating an address, // so we set them here. info->srcCount = 0; - if (tree->AsAddrMode()->Base() != nullptr) + if (tree->AsAddrMode()->HasBase()) { info->srcCount++; } - if (tree->AsAddrMode()->Index() != nullptr) + if (tree->AsAddrMode()->HasIndex()) { info->srcCount++; } @@ -1756,39 +1510,36 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) { MakeSrcContained(tree, src); } + else if (!varTypeIsFloating(tree)) + { + // Perform recognition of trees with the following structure: + // StoreInd(addr, BinOp(expr, GT_IND(addr))) + // to be able to fold this into an instruction of the form + // BINOP [addr], register + // where register is the actual place where 'expr' is computed. + // + // SSE2 doesn't support RMW form of instructions. + if (SetStoreIndOpCountsIfRMWMemOp(tree)) + { + break; + } + } - // Perform recognition of trees with the following structure: - // StoreInd(IndA, BinOp(expr, IndA)) - // to be able to fold this into an instruction of the form - // BINOP [addressing mode for IndA], register - // where register is the actual place where 'expr' - // is computed. - // - // SSE2 doesn't support RMW form of instructions. - if (!varTypeIsFloating(tree) && LowerStoreInd(tree)) - break; - - GenTreePtr addr = tree->gtOp.gtOp1; - - HandleIndirAddressExpression(tree, addr); + SetIndirAddrOpCounts(tree); } break; case GT_NULLCHECK: + info->dstCount = 0; + info->srcCount = 1; info->isLocalDefUse = true; - - __fallthrough; + break; case GT_IND: - { - info->dstCount = tree->OperGet() == GT_NULLCHECK ? 0 : 1; - info->srcCount = 1; - - GenTreePtr addr = tree->gtOp.gtOp1; - - HandleIndirAddressExpression(tree, addr); - } - break; + info->dstCount = 1; + info->srcCount = 1; + SetIndirAddrOpCounts(tree); + break; case GT_CATCH_ARG: info->srcCount = 0; @@ -1814,12 +1565,23 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) break; } // end switch (tree->OperGet()) - if (tree->OperIsBinary() && info->srcCount >= 2) + // If op2 of a binary-op gets marked as contained, then binary-op srcCount will be 1. + // Even then we would like to set isTgtPref on Op1. + if (tree->OperIsBinary() && info->srcCount >= 1) { if (isRMWRegOper(tree)) { GenTree* op1 = tree->gtOp.gtOp1; GenTree* op2 = tree->gtOp.gtOp2; + + // Commutative opers like add/mul/and/or/xor could reverse the order of + // operands if it is safe to do so. In such a case we would like op2 to be + // target preferenced instead of op1. + if (tree->OperIsCommutative() && op1->gtLsraInfo.dstCount == 0 && op2 != nullptr) + { + op1 = op2; + op2 = tree->gtOp.gtOp1; + } // If we have a read-modify-write operation, we want to preference op1 to the target. // If op1 is contained, we don't want to preference it, but it won't @@ -1902,7 +1664,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) info->setDstCandidates(l, regMask & ~RBM_NON_BYTE_REGS); } - if (info->srcCount > 0) + if (tree->OperIsSimple() && (info->srcCount > 0)) { // No need to set src candidates on a contained child operand. GenTree *op = tree->gtOp.gtOp1; @@ -1915,9 +1677,9 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS); } - op = tree->gtOp.gtOp2; - if (op != nullptr) + if (tree->OperIsBinary() && (tree->gtOp.gtOp2 != nullptr)) { + op = tree->gtOp.gtOp2; containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0); if (!containedNode) { @@ -1937,6 +1699,317 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) } } +//------------------------------------------------------------------------ +// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store. +// +// Arguments: +// blkNode - The block store node of interest +// +// Return Value: +// None. +// +// Notes: + +void +Lowering::TreeNodeInfoInitBlockStore(GenTreeBlkOp* blkNode) +{ + GenTree* dstAddr = blkNode->Dest(); + unsigned size; + LinearScan* l = m_lsra; + Compiler* compiler = comp; + + // Sources are dest address, initVal or source, and size + blkNode->gtLsraInfo.srcCount = 3; + blkNode->gtLsraInfo.dstCount = 0; + + if (blkNode->OperGet() == GT_INITBLK) + { + GenTreeInitBlk* initBlkNode = blkNode->AsInitBlk(); + + GenTreePtr blockSize = initBlkNode->Size(); + GenTreePtr initVal = initBlkNode->InitVal(); + + // If we have an InitBlk with constant block size we can optimize several ways: + // a) If the size is smaller than a small memory page but larger than INITBLK_UNROLL_LIMIT bytes + // we use rep stosb since this reduces the register pressure in LSRA and we have + // roughly the same performance as calling the helper. + // b) If the size is <= INITBLK_UNROLL_LIMIT bytes and the fill byte is a constant, + // we can speed this up by unrolling the loop using SSE2 stores. The reason for + // this threshold is because our last investigation (Fall 2013), more than 95% of initblks + // in our framework assemblies are actually <= INITBLK_UNROLL_LIMIT bytes size, so this is the + // preferred code sequence for the vast majority of cases. + + // This threshold will decide from using the helper or let the JIT decide to inline + // a code sequence of its choice. + ssize_t helperThreshold = max(INITBLK_STOS_LIMIT, INITBLK_UNROLL_LIMIT); + + // TODO-X86-CQ: Investigate whether a helper call would be beneficial on x86 + if (blockSize->IsCnsIntOrI() && blockSize->gtIntCon.gtIconVal <= helperThreshold) + { + ssize_t size = blockSize->gtIntCon.gtIconVal; + + // Always favor unrolling vs rep stos. + if (size <= INITBLK_UNROLL_LIMIT && initVal->IsCnsIntOrI()) + { + // Replace the integer constant in initVal + // to fill an 8-byte word with the fill value of the InitBlk + assert(initVal->gtIntCon.gtIconVal == (initVal->gtIntCon.gtIconVal & 0xFF)); +#ifdef _TARGET_AMD64_ + if (size < REGSIZE_BYTES) + { + initVal->gtIntCon.gtIconVal = 0x01010101 * initVal->gtIntCon.gtIconVal; + } + else + { + initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * initVal->gtIntCon.gtIconVal; + initVal->gtType = TYP_LONG; + } +#else // !_TARGET_AMD64_ + initVal->gtIntCon.gtIconVal = 0x01010101 * initVal->gtIntCon.gtIconVal; +#endif // !_TARGET_AMD64_ + + MakeSrcContained(blkNode, blockSize); + + // In case we have a buffer >= 16 bytes + // we can use SSE2 to do a 128-bit store in a single + // instruction. + if (size >= XMM_REGSIZE_BYTES) + { + // Reserve an XMM register to fill it with + // a pack of 16 init value constants. + blkNode->gtLsraInfo.internalFloatCount = 1; + blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates()); + } + initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindUnroll; + } + else + { + // rep stos has the following register requirements: + // a) The memory address to be in RDI. + // b) The fill value has to be in RAX. + // c) The buffer size must be in RCX. + dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_RDI); + initVal->gtLsraInfo.setSrcCandidates(l, RBM_RAX); + blockSize->gtLsraInfo.setSrcCandidates(l, RBM_RCX); + initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindRepInstr; + } + } + else + { +#ifdef _TARGET_AMD64_ + // The helper follows the regular AMD64 ABI. + dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); + initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); + blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); + initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindHelper; +#else // !_TARGET_AMD64_ + dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_RDI); + initVal->gtLsraInfo.setSrcCandidates(l, RBM_RAX); + blockSize->gtLsraInfo.setSrcCandidates(l, RBM_RCX); + initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindRepInstr; +#endif // !_TARGET_AMD64_ + } + } + else if (blkNode->OperGet() == GT_COPYOBJ) + { + GenTreeCpObj* cpObjNode = blkNode->AsCpObj(); + + GenTreePtr clsTok = cpObjNode->ClsTok(); + GenTreePtr srcAddr = cpObjNode->Source(); + + unsigned slots = cpObjNode->gtSlots; + +#ifdef DEBUG + // CpObj must always have at least one GC-Pointer as a member. + assert(cpObjNode->gtGcPtrCount > 0); + + assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL); + assert(clsTok->IsIconHandle()); + + CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)clsTok->gtIntCon.gtIconVal; + size_t classSize = compiler->info.compCompHnd->getClassSize(clsHnd); + size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE); + + // Currently, the EE always round up a class data structure so + // we are not handling the case where we have a non multiple of pointer sized + // struct. This behavior may change in the future so in order to keeps things correct + // let's assert it just to be safe. Going forward we should simply + // handle this case. + assert(classSize == blkSize); + assert((blkSize / TARGET_POINTER_SIZE) == slots); + assert((cpObjNode->gtFlags & GTF_BLK_HASGCPTR) != 0); +#endif + + bool IsRepMovsProfitable = false; + + // If the destination is not on the stack, let's find out if we + // can improve code size by using rep movsq instead of generating + // sequences of movsq instructions. + if (!dstAddr->OperIsLocalAddr()) + { + // Let's inspect the struct/class layout and determine if it's profitable + // to use rep movsq for copying non-gc memory instead of using single movsq + // instructions for each memory slot. + unsigned i = 0; + BYTE* gcPtrs = cpObjNode->gtGcPtrs; + + do { + unsigned nonGCSlots = 0; + // Measure a contiguous non-gc area inside the struct and note the maximum. + while (i < slots && gcPtrs[i] == TYPE_GC_NONE) + { + nonGCSlots++; + i++; + } + + while (i < slots && gcPtrs[i] != TYPE_GC_NONE) + { + i++; + } + + if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT) + { + IsRepMovsProfitable = true; + break; + } + } while (i < slots); + } + else if (slots >= CPOBJ_NONGC_SLOTS_LIMIT) + { + IsRepMovsProfitable = true; + } + + // There are two cases in which we need to materialize the + // struct size: + // a) When the destination is on the stack we don't need to use the + // write barrier, we can just simply call rep movsq and get a win in codesize. + // b) If we determine we have contiguous non-gc regions in the struct where it's profitable + // to use rep movsq instead of a sequence of single movsq instructions. According to the + // Intel Manual, the sweet spot for small structs is between 4 to 12 slots of size where + // the entire operation takes 20 cycles and encodes in 5 bytes (moving RCX, and calling rep movsq). + if (IsRepMovsProfitable) + { + // We need the size of the contiguous Non-GC-region to be in RCX to call rep movsq. + MakeSrcContained(blkNode, clsTok); + blkNode->gtLsraInfo.internalIntCount = 1; + blkNode->gtLsraInfo.setInternalCandidates(l, RBM_RCX); + } + else + { + // We don't need to materialize the struct size because we will unroll + // the loop using movsq that automatically increments the pointers. + MakeSrcContained(blkNode, clsTok); + } + + dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_RDI); + srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_RSI); + } + else + { + assert(blkNode->OperGet() == GT_COPYBLK); + GenTreeCpBlk* cpBlkNode = blkNode->AsCpBlk(); + + GenTreePtr blockSize = cpBlkNode->Size(); + GenTreePtr srcAddr = cpBlkNode->Source(); + + // In case of a CpBlk with a constant size and less than CPBLK_MOVS_LIMIT size + // we can use rep movs to generate code instead of the helper call. + + // This threshold will decide from using the helper or let the JIT decide to inline + // a code sequence of its choice. + ssize_t helperThreshold = max(CPBLK_MOVS_LIMIT, CPBLK_UNROLL_LIMIT); + + // TODO-X86-CQ: Investigate whether a helper call would be beneficial on x86 + if (blockSize->IsCnsIntOrI() && blockSize->gtIntCon.gtIconVal <= helperThreshold) + { + assert(!blockSize->IsIconHandle()); + ssize_t size = blockSize->gtIntCon.gtIconVal; + + // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2. + // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of + // our framework assemblies, so this is the main code generation scheme we'll use. + if (size <= CPBLK_UNROLL_LIMIT) + { + MakeSrcContained(blkNode, blockSize); + + // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg. + // + // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte. + // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude + // RBM_NON_BYTE_REGS from internal candidates. + if ((size & (XMM_REGSIZE_BYTES - 1)) != 0) + { + blkNode->gtLsraInfo.internalIntCount++; + regMaskTP regMask = l->allRegs(TYP_INT); + +#ifdef _TARGET_X86_ + if ((size % 2) != 0) + { + regMask &= ~RBM_NON_BYTE_REGS; + } +#endif + blkNode->gtLsraInfo.setInternalCandidates(l, regMask); + } + + if (size >= XMM_REGSIZE_BYTES) + { + // If we have a buffer larger than XMM_REGSIZE_BYTES, + // reserve an XMM register to use it for a + // series of 16-byte loads and stores. + blkNode->gtLsraInfo.internalFloatCount = 1; + blkNode->gtLsraInfo.addInternalCandidates(l, l->internalFloatRegCandidates()); + } + + // If src or dst are on stack, we don't have to generate the address into a register + // because it's just some constant+SP + if (srcAddr->OperIsLocalAddr()) + { + MakeSrcContained(blkNode, srcAddr); + } + + if (dstAddr->OperIsLocalAddr()) + { + MakeSrcContained(blkNode, dstAddr); + } + + cpBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindUnroll; + } + else + { + dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_RDI); + srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_RSI); + blockSize->gtLsraInfo.setSrcCandidates(l, RBM_RCX); + cpBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindRepInstr; + } + } +#ifdef _TARGET_AMD64_ + else + { + // In case we have a constant integer this means we went beyond + // CPBLK_MOVS_LIMIT bytes of size, still we should never have the case of + // any GC-Pointers in the src struct. + if (blockSize->IsCnsIntOrI()) + { + assert(!blockSize->IsIconHandle()); + } + + dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); + srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); + blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); + cpBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindHelper; + } +#elif defined(_TARGET_X86_) + else + { + dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_RDI); + srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_RSI); + blockSize->gtLsraInfo.setSrcCandidates(l, RBM_RCX); + cpBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindRepInstr; + } +#endif // _TARGET_X86_ + } +} + #ifdef FEATURE_SIMD //------------------------------------------------------------------------ // TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree. @@ -2180,9 +2253,11 @@ Lowering::TreeNodeInfoInitSIMD(GenTree* tree, LinearScan* lsra) void Lowering::LowerGCWriteBarrier(GenTree* tree) { - GenTreePtr dst = tree; - GenTreePtr addr = tree->gtOp.gtOp1; - GenTreePtr src = tree->gtOp.gtOp2; + assert(tree->OperGet() == GT_STOREIND); + + GenTreeStoreInd* dst = tree->AsStoreInd(); + GenTreePtr addr = dst->Addr(); + GenTreePtr src = dst->Data(); if (addr->OperGet() == GT_LEA) { @@ -2191,12 +2266,12 @@ void Lowering::LowerGCWriteBarrier(GenTree* tree) // lea in a register GenTreeAddrMode* lea = addr->AsAddrMode(); - short leaSrcCount = 0; - if (lea->Base() != nullptr) + int leaSrcCount = 0; + if (lea->HasBase()) { leaSrcCount++; } - if (lea->Index() != nullptr) + if (lea->HasIndex()) { leaSrcCount++; } @@ -2204,37 +2279,69 @@ void Lowering::LowerGCWriteBarrier(GenTree* tree) lea->gtLsraInfo.dstCount = 1; } - // !!! This code was leveraged from codegen.cpp + bool useOptimizedWriteBarrierHelper = false; // By default, assume no optimized write barriers. + #if NOGC_WRITE_BARRIERS -#ifdef _TARGET_AMD64_ -#error "NOGC_WRITE_BARRIERS is not supported for _TARGET_AMD64" -#else // !_TARGET_AMD64_ - NYI("NYI: NOGC_WRITE_BARRIERS for RyuJIT/x86"); -#endif // !_TARGET_AMD64_ + +#if defined(_TARGET_X86_) + + useOptimizedWriteBarrierHelper = true; // On x86, use the optimized write barriers by default. +#ifdef DEBUG + GCInfo::WriteBarrierForm wbf = comp->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, src); + if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method. + { + useOptimizedWriteBarrierHelper = false; + } +#endif + + if (useOptimizedWriteBarrierHelper) + { + // Special write barrier: + // op1 (addr) goes into REG_WRITE_BARRIER (rdx) and + // op2 (src) goes into any int register. + addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER); + src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_SRC); + } + +#else // !defined(_TARGET_X86_) +#error "NOGC_WRITE_BARRIERS is not supported" +#endif // !defined(_TARGET_X86_) + #endif // NOGC_WRITE_BARRIERS - // For the standard JIT Helper calls - // op1 goes into REG_ARG_0 and - // op2 goes into REG_ARG_1 - // Set this RefPosition, and the previous one, to the physical - // register instead of a virtual one - // - addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0); - src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1); + + if (!useOptimizedWriteBarrierHelper) + { + // For the standard JIT Helper calls: + // op1 (addr) goes into REG_ARG_0 and + // op2 (src) goes into REG_ARG_1 + addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0); + src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1); + } + // Both src and dst must reside in a register, which they should since we haven't set // either of them as contained. assert(addr->gtLsraInfo.dstCount == 1); assert(src->gtLsraInfo.dstCount == 1); } - -void Lowering::HandleIndirAddressExpression(GenTree* indirTree, GenTree* addr) +//----------------------------------------------------------------------------------------- +// Specify register requirements for address expression of an indirection operation. +// +// Arguments: +// indirTree - GT_IND or GT_STOREIND gentree node +// +void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree) { - GenTree* base = nullptr; - GenTree* index = nullptr; + assert(indirTree->isIndir()); + + GenTreePtr addr = indirTree->gtGetOp1(); + TreeNodeInfo* info = &(indirTree->gtLsraInfo); + + GenTreePtr base = nullptr; + GenTreePtr index = nullptr; unsigned mul, cns; bool rev; - bool modifiedSources = false; - TreeNodeInfo* info = &(indirTree->gtLsraInfo); + bool modifiedSources = false; // If indirTree is of TYP_SIMD12, don't mark addr as contained // so that it always get computed to a register. This would @@ -2273,24 +2380,27 @@ void Lowering::HandleIndirAddressExpression(GenTree* indirTree, GenTree* addr) // make this contained, it turns into a constant that goes into an addr mode MakeSrcContained(indirTree, addr); } - - // TODO-XArch-CQ: The below condition is incorrect and need to be revisited for the following reasons: - // a) FitsInAddrBase() already checks for opts.compReloc and - // b) opts.compReloc is set only during Ngen. - // c) During lowering we should not be checking gtRegNum - // For the above reasons this condition will never be true and indir of absolute addresses - // that can be encoded as PC-relative 32-bit offset are never marked as contained. - // - // The right condition to check probably here is - // "addr->IsCnsIntOrI() && comp->codeGen->genAddrShouldUsePCRel(addr->AsIntConCommon()->IconValue())" - // - // Apart from making this change, codegen side changes are needed to handle contained addr - // where GT_IND is possible as an operand. else if (addr->IsCnsIntOrI() && addr->AsIntConCommon()->FitsInAddrBase(comp) && - comp->opts.compReloc && - (addr->gtRegNum != REG_NA)) + addr->gtLsraInfo.getDstCandidates(m_lsra) != RBM_VIRTUAL_STUB_PARAM) { + // Amd64: + // We can mark any pc-relative 32-bit addr as containable, except for a direct VSD call address. + // (i.e. those VSD calls for which stub addr is known during JIT compilation time). In this case, + // VM requires us to pass stub addr in REG_VIRTUAL_STUB_PARAM - see LowerVirtualStubCall(). For + // that reason we cannot mark such an addr as contained. Note that this is not an issue for + // indirect VSD calls since morphArgs() is explicitly materializing hidden param as a non-standard + // argument. + // + // Workaround: + // Note that LowerVirtualStubCall() sets addr->gtRegNum to REG_VIRTUAL_STUB_PARAM and Lowering::doPhase() + // sets destination candidates on such nodes and resets addr->gtRegNum to REG_NA before calling + // TreeNodeInfoInit(). Ideally we should set a flag on addr nodes that shouldn't be marked as contained + // (in LowerVirtualStubCall()), but we don't have any GTF_* flags left for that purpose. As a workaround + // an explicit check is made here. + // + // TODO-x86: Right now lowering of virst stub dispatch call is a NYI. Once it is implemented, we should + // take a relook at this. MakeSrcContained(indirTree, addr); } else if (addr->OperGet() == GT_LEA) @@ -2305,7 +2415,7 @@ void Lowering::HandleIndirAddressExpression(GenTree* indirTree, GenTree* addr) info->srcCount--; } else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) - && !(modifiedSources = AreSourcesPossiblyModified(indirTree, base, index))) + && !(modifiedSources = AreSourcesPossiblyModified(indirTree, base, index))) { // An addressing mode will be constructed that may cause some // nodes to not need a register, and cause others' lifetimes to be extended @@ -2425,6 +2535,7 @@ void Lowering::LowerCmp(GenTreePtr tree) #if !defined(_TARGET_64BIT_) // Long compares will consume GT_LONG nodes, each of which produces two results. // Thus for each long operand there will be an additional source. + // TODO-X86-CQ: Mark hiOp2 and loOp2 as contained if it is a constant or a memory op. if (varTypeIsLong(op1Type)) { info->srcCount++; @@ -2619,10 +2730,74 @@ void Lowering::LowerCmp(GenTreePtr tree) GenTreePtr andOp1 = op1->gtOp.gtOp1; if (andOp1->isMemoryOp()) { + // If the type of value memoryOp (andOp1) is not the same as the type of constant (andOp2) + // check to see whether it is safe to mark AndOp1 as contained. For e.g. in the following + // case it is not safe to mark andOp1 as contained + // AndOp1 = signed byte and andOp2 is an int constant of value 512. + // + // If it is safe, we update the type and value of andOp2 to match with andOp1. + bool containable = (andOp1->TypeGet() == op1->TypeGet()); + if (!containable) + { + ssize_t newIconVal = 0; + + switch (andOp1->TypeGet()) + { + default: + break; + case TYP_BYTE: + newIconVal = (signed char)andOp2CnsVal; + containable = FitsIn<signed char>(andOp2CnsVal); + break; + case TYP_BOOL: + case TYP_UBYTE: + newIconVal = andOp2CnsVal & 0xFF; + containable = true; + break; + case TYP_SHORT: + newIconVal = (signed short)andOp2CnsVal; + containable = FitsIn<signed short>(andOp2CnsVal); + break; + case TYP_CHAR: + newIconVal = andOp2CnsVal & 0xFFFF; + containable = true; + break; + case TYP_INT: + newIconVal = (INT32)andOp2CnsVal; + containable = FitsIn<INT32>(andOp2CnsVal); + break; + case TYP_UINT: + newIconVal = andOp2CnsVal & 0xFFFFFFFF; + containable = true; + break; + +#ifdef _TARGET_64BIT_ + case TYP_LONG: + newIconVal = (INT64)andOp2CnsVal; + containable = true; + break; + case TYP_ULONG: + newIconVal = (UINT64)andOp2CnsVal; + containable = true; + break; +#endif //_TARGET_64BIT_ + } + + + if (containable) + { + andOp2->gtType = andOp1->TypeGet(); + andOp2->AsIntConCommon()->SetIconValue(newIconVal); + } + } + // Mark the 'andOp1' memory operand as contained // Note that for equality comparisons we don't need // to deal with any signed or unsigned issues. - MakeSrcContained(op1, andOp1); + if (containable) + { + MakeSrcContained(op1, andOp1); + } } // Mark the 'op1' (the GT_AND) operand as contained MakeSrcContained(tree, op1); @@ -2845,70 +3020,166 @@ void Lowering::LowerCast( GenTreePtr* ppTree) } } -/** Lower StoreInd takes care of recognizing the cases where we have a treeNode with the following - * structure: - * storeInd(gtInd(subTreeA), binOp(gtInd(subTreeA), subtreeB) or - * storeInd(gtInd(subTreeA), binOp(subtreeB, gtInd(subTreeA)) for the case of commutative - * operations. - * - * In x86/x64 this storeInd pattern can be effectively encoded in a single instruction of the - * form in case of integer operations: - * binOp [addressing mode], regSubTreeB - * where regSubTreeB is the register where subTreeB was computed. - * - * If the recognition is successful, we mark all the nodes under the storeInd node as contained so codeGen - * will generate the single instruction discussed above. - * - * Right now, we recognize few cases: - * a) The gtIndir child is a lclVar - * b) A constant - * c) An lea. - * d) BinOp is either add, sub, xor, or, and, shl, rsh, rsz. - * - * TODO-CQ: Enable support for more complex indirections (if needed) or use the value numbering - * package to perform more complex tree recognition. - * - * TODO-XArch-CQ: Add support for RMW of lcl fields (e.g. lclfield binop= source) - * - * Return value: In case we recognize the tree pattern, we return true to specify lower we're - * finished and no further code needs to be run in order to lower this type of node. - */ -bool Lowering::LowerStoreInd(GenTreePtr tree) + //---------------------------------------------------------------------------------------------- + // Returns true if this tree is bin-op of a GT_STOREIND of the following form + // storeInd(subTreeA, binOp(gtInd(subTreeA), subtreeB)) or + // storeInd(subTreeA, binOp(subtreeB, gtInd(subTreeA)) in case of commutative bin-ops + // + // The above form for storeInd represents a read-modify-write memory binary operation. + // + // Parameters + // tree - GentreePtr of binOp + // + // Return Value + // True if 'tree' is part of a RMW memory operation pattern + // +bool Lowering::IsBinOpInRMWStoreInd(GenTreePtr tree) { - assert(tree->OperGet() == GT_STOREIND); + // Must be a non floating-point type binary operator since SSE2 doesn't support RMW memory ops + assert(!varTypeIsFloating(tree)); + assert(GenTree::OperIsBinary(tree->OperGet())); + + // Cheap bail out check before more expensive checks are performed. + // RMW memory op pattern requires that one of the operands of binOp to be GT_IND. + if (tree->gtGetOp1()->OperGet() != GT_IND && tree->gtGetOp2()->OperGet() != GT_IND) + { + return false; + } + + GenTreePtr parent = tree->gtGetParent(nullptr); + if (parent == nullptr || parent->OperGet() != GT_STOREIND || parent->gtGetOp2() != tree) + { + return false; + } - // SSE2 doesn't support RMW operations on float/double types. + // Since it is not relatively cheap to recognize RMW memory op pattern, we + // cache the result in GT_STOREIND node so that while lowering GT_STOREIND + // we can use the result. + GenTreePtr indirCandidate = nullptr; + GenTreePtr indirOpSource = nullptr; + return IsRMWMemOpRootedAtStoreInd(parent, &indirCandidate, &indirOpSource); +} + + //---------------------------------------------------------------------------------------------- + // This method recognizes the case where we have a treeNode with the following structure: + // storeInd(IndirDst, binOp(gtInd(IndirDst), indirOpSource)) OR + // storeInd(IndirDst, binOp(indirOpSource, gtInd(IndirDst)) in case of commutative operations OR + // storeInd(IndirDst, unaryOp(gtInd(IndirDst)) in case of unary operations + // + // Terminology: + // indirDst = memory write of an addr mode (i.e. storeind destination) + // indirSrc = value being written to memory (i.e. storeind source which could either be a binary or unary op) + // indirCandidate = memory read i.e. a gtInd of an addr mode + // indirOpSource = source operand used in binary/unary op (i.e. source operand of indirSrc node) + // + // In x86/x64 this storeInd pattern can be effectively encoded in a single instruction of the + // following form in case of integer operations: + // binOp [addressing mode], RegIndirOpSource + // binOp [addressing mode], immediateVal + // where RegIndirOpSource is the register where indirOpSource was computed. + // + // Right now, we recognize few cases: + // a) The gtInd child is a lea/lclVar/lclVarAddr/clsVarAddr/constant + // b) BinOp is either add, sub, xor, or, and, shl, rsh, rsz. + // c) unaryOp is either not/neg + // + // Implementation Note: The following routines need to be in sync for RMW memory op optimization + // to be correct and functional. + // IndirsAreEquivalent() + // NodesAreEquivalentLeaves() + // Codegen of GT_STOREIND and genCodeForShift() + // emitInsRMW() + // + // TODO-CQ: Enable support for more complex indirections (if needed) or use the value numbering + // package to perform more complex tree recognition. + // + // TODO-XArch-CQ: Add support for RMW of lcl fields (e.g. lclfield binop= source) + // + // Parameters: + // tree - GT_STOREIND node + // outIndirCandidate - out param set to indirCandidate as described above + // ouutIndirOpSource - out param set to indirOpSource as described above + // + // Return value + // True if there is a RMW memory operation rooted at a GT_STOREIND tree + // and out params indirCandidate and indirOpSource are set to non-null values. + // Otherwise, returns false with indirCandidate and indirOpSource set to null. + // Also updates flags of GT_STOREIND tree with its RMW status. + // +bool Lowering::IsRMWMemOpRootedAtStoreInd(GenTreePtr tree, GenTreePtr *outIndirCandidate, GenTreePtr *outIndirOpSource) +{ assert(!varTypeIsFloating(tree)); + assert(outIndirCandidate != nullptr); + assert(outIndirOpSource != nullptr); + + *outIndirCandidate = nullptr; + *outIndirOpSource = nullptr; + + // Early out if storeInd is already known to be a non-RMW memory op + GenTreeStoreInd* storeInd = tree->AsStoreInd(); + if (storeInd->IsNonRMWMemoryOp()) + { + return false; + } - GenTreePtr indirDst = tree->gtGetOp1(); - GenTreePtr indirSrc = tree->gtGetOp2(); + GenTreePtr indirDst = storeInd->gtGetOp1(); + GenTreePtr indirSrc = storeInd->gtGetOp2(); + genTreeOps oper = indirSrc->OperGet(); + + // Early out if it is already known to be a RMW memory op + if (storeInd->IsRMWMemoryOp()) + { + if (GenTree::OperIsBinary(oper)) + { + if (storeInd->IsRMWDstOp1()) + { + *outIndirCandidate = indirSrc->gtGetOp1(); + *outIndirOpSource = indirSrc->gtGetOp2(); + } + else + { + assert(storeInd->IsRMWDstOp2()); + *outIndirCandidate = indirSrc->gtGetOp2(); + *outIndirOpSource = indirSrc->gtGetOp1(); + } + assert(IndirsAreEquivalent(*outIndirCandidate, storeInd)); + } + else + { + assert(GenTree::OperIsUnary(oper)); + assert(IndirsAreEquivalent(indirSrc->gtGetOp1(), storeInd)); + *outIndirCandidate = indirSrc->gtGetOp1(); + *outIndirOpSource = indirSrc->gtGetOp1(); + } + + return true; + } - const genTreeOps oper = indirSrc->OperGet(); + // If reached here means that we do not know RMW status of tree rooted at storeInd + assert(storeInd->IsRMWStatusUnknown()); + // Early out if indirDst is not one of the supported memory operands. if (indirDst->OperGet() != GT_LEA && indirDst->OperGet() != GT_LCL_VAR && indirDst->OperGet() != GT_LCL_VAR_ADDR && - indirDst->OperGet() != GT_CLS_VAR_ADDR) + indirDst->OperGet() != GT_CLS_VAR_ADDR && + indirDst->OperGet() != GT_CNS_INT) { - JITDUMP("Lower of StoreInd didn't mark the node as self contained\n"); - JITDUMP("because the type of indirection in the left hand side \n"); - JITDUMP("is not yet supported:\n"); - DISPTREE(indirDst); + storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_ADDR); + return false; + } + + // We can not use Read-Modify-Write instruction forms with overflow checking instructions + // because we are not allowed to modify the target until after the overflow check. + if (indirSrc->gtOverflowEx()) + { + storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_OPER); return false; } if (GenTree::OperIsBinary(oper)) { - if (indirSrc->gtOverflowEx()) - { - // We can not use Read-Modify-Write instruction forms with overflow checking instructions - // because we are not allowed to modify the target until after the overflow check. - // - JITDUMP("Lower of StoreInd cannot lower overflow checking instructions into RMW forms\n"); - DISPTREE(indirDst); - return false; - } - + // Return if binary op is not one of the supported operations for RMW of memory. if (oper != GT_ADD && oper != GT_SUB && oper != GT_AND && @@ -2920,124 +3191,164 @@ bool Lowering::LowerStoreInd(GenTreePtr tree) oper != GT_ROL && oper != GT_ROR) { - JITDUMP("Lower of StoreInd didn't mark the node as self contained\n"); - JITDUMP("because the node operator not yet supported:\n"); - DISPTREE(indirSrc); + storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_OPER); return false; } if ((oper == GT_LSH || - oper == GT_RSH || - oper == GT_RSZ || - oper == GT_ROL || - oper == GT_ROR) && - varTypeIsSmall(tree)) - { - //In ldind, Integer values smaller than 4 bytes, a boolean, or a character converted to 4 bytes by sign or zero-extension as appropriate. - //If directly shift the short type data using sar, we will lose the sign or zero-extension bits. This will generate the wrong code. + oper == GT_RSH || + oper == GT_RSZ || + oper == GT_ROL || + oper == GT_ROR) && + varTypeIsSmall(storeInd)) + { + // In ldind, Integer values smaller than 4 bytes, a boolean, or a character converted to 4 bytes + // by sign or zero-extension as appropriate. If we directly shift the short type data using sar, we + // will lose the sign or zero-extension bits. + storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_TYPE); return false; } GenTreePtr rhsLeft = indirSrc->gtGetOp1(); GenTreePtr rhsRight = indirSrc->gtGetOp2(); - GenTreePtr indirCandidate = nullptr; - GenTreePtr indirOpSource = nullptr; - - if (rhsLeft->OperGet() == GT_IND && - rhsLeft->gtGetOp1()->OperGet() == indirDst->OperGet() && - IsSafeToContainMem(indirSrc, rhsLeft)) - { - indirCandidate = rhsLeft; - indirOpSource = rhsRight; - } - else if (GenTree::OperIsCommutative(oper) && - rhsRight->OperGet() == GT_IND && - rhsRight->gtGetOp1()->OperGet() == indirDst->OperGet()) - { - indirCandidate = rhsRight; - indirOpSource = rhsLeft; - } - - if (indirCandidate == nullptr && - indirOpSource == nullptr) + // The most common case is rhsRight is GT_IND + if (GenTree::OperIsCommutative(oper) && + rhsRight->OperGet() == GT_IND && + rhsRight->gtGetOp1()->OperGet() == indirDst->OperGet() && + IndirsAreEquivalent(rhsRight, storeInd)) { - JITDUMP("Lower of StoreInd didn't mark the node as self contained\n"); - JITDUMP("because the indirections don't match or the operator is not commutative\n"); - DISPTREE(tree); - return false; - } - - if (IndirsAreEquivalent(indirCandidate, tree)) - { - JITDUMP("Lower succesfully detected an assignment of the form: *addrMode BinOp= source\n"); - tree->gtLsraInfo.srcCount = indirOpSource->gtLsraInfo.dstCount; - SetStoreIndOpCounts(tree, indirCandidate); + *outIndirCandidate = rhsRight; + *outIndirOpSource = rhsLeft; + storeInd->SetRMWStatus(STOREIND_RMW_DST_IS_OP2); return true; } - else + else if (rhsLeft->OperGet() == GT_IND && + rhsLeft->gtGetOp1()->OperGet() == indirDst->OperGet() && + IsSafeToContainMem(indirSrc, rhsLeft) && + IndirsAreEquivalent(rhsLeft, storeInd)) { - JITDUMP("Lower of StoreInd didn't mark the node as self contained\n"); - JITDUMP("because the indirections are not equivalent.\n"); - DISPTREE(tree); - return false; + *outIndirCandidate = rhsLeft; + *outIndirOpSource = rhsRight; + storeInd->SetRMWStatus(STOREIND_RMW_DST_IS_OP1); + return true; } - } + + storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_ADDR); + return false; + } else if (GenTree::OperIsUnary(oper)) { - // Nodes other than GT_NOT and GT_NEG are not yet supported - // so we bail for now. + // Nodes other than GT_NOT and GT_NEG are not yet supported. if (oper != GT_NOT && oper != GT_NEG) + { + storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_OPER); return false; + } - // If the operand of the GT_NOT | GT_NEG is not an indirection, - // then this is not a RMW pattern. if (indirSrc->gtGetOp1()->OperGet() != GT_IND) + { + storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_ADDR); return false; + } - // We have a GT_IND below the NOT/NEG, so we attempt to recognize - // the RMW pattern. GenTreePtr indirCandidate = indirSrc->gtGetOp1(); - if (IndirsAreEquivalent(indirCandidate, tree)) + if (indirCandidate->gtGetOp1()->OperGet() == indirDst->OperGet() && + IndirsAreEquivalent(indirCandidate, storeInd)) { - JITDUMP("Lower succesfully detected an assignment of the form: *addrMode = UnaryOp(*addrMode)\n"); - tree->gtLsraInfo.srcCount = 0; - SetStoreIndOpCounts(tree, indirCandidate); + // src and dest are the same in case of unary ops + *outIndirCandidate = indirCandidate; + *outIndirOpSource = indirCandidate; + storeInd->SetRMWStatus(STOREIND_RMW_DST_IS_OP1); return true; } - else - { - JITDUMP("Lower of StoreInd didn't mark the node as self contained\n"); - JITDUMP("because the indirections are not equivalent.\n"); - DISPTREE(tree); - return false; - } - } - else - { - JITDUMP("Lower of StoreInd didn't mark the node as self contained\n"); - JITDUMP("because the operator on the right hand side of the indirection is not\n"); - JITDUMP("a binary or unary operator.\n"); - DISPTREE(tree); - return false; } -} -void Lowering::LowerRotate(GenTreePtr tree) -{ - // xarch supports both ROL and ROR instructions so no lowering is required. + assert(*outIndirCandidate == nullptr); + assert(*outIndirOpSource == nullptr); + storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_OPER); + return false; } -void Lowering::SetStoreIndOpCounts(GenTreePtr storeInd, GenTreePtr indirCandidate) + //-------------------------------------------------------------------------------------------- + // SetStoreIndOpCountsIfRMWMemOp checks to see if there is a RMW memory operation rooted at + // GT_STOREIND node and if so will mark register requirements for nodes under storeInd so + // that CodeGen will generate a single instruction of the form: + // + // binOp [addressing mode], reg + // + // Parameters + // storeInd - GT_STOREIND node + // + // Return value + // True, if RMW memory op tree pattern is recognized and op counts are set. + // False otherwise. + // +bool Lowering::SetStoreIndOpCountsIfRMWMemOp(GenTreePtr storeInd) { + assert(storeInd->OperGet() == GT_STOREIND); + + // SSE2 doesn't support RMW on float values + assert(!varTypeIsFloating(storeInd)); + + // Terminology: + // indirDst = memory write of an addr mode (i.e. storeind destination) + // indirSrc = value being written to memory (i.e. storeind source which could a binary/unary op) + // indirCandidate = memory read i.e. a gtInd of an addr mode + // indirOpSource = source operand used in binary/unary op (i.e. source operand of indirSrc node) + + GenTreePtr indirCandidate = nullptr; + GenTreePtr indirOpSource = nullptr; + + if (!IsRMWMemOpRootedAtStoreInd(storeInd, &indirCandidate, &indirOpSource)) + { + JITDUMP("Lower of StoreInd didn't mark the node as self contained for reason: %d\n", storeInd->AsStoreInd()->GetRMWStatus()); + DISPTREE(storeInd); + return false; + } + GenTreePtr indirDst = storeInd->gtGetOp1(); GenTreePtr indirSrc = storeInd->gtGetOp2(); - TreeNodeInfo* info = &(storeInd->gtLsraInfo); + genTreeOps oper = indirSrc->OperGet(); + + // At this point we have successfully detected a RMW memory op of one of the following forms + // storeInd(indirDst, indirSrc(indirCandidate, indirOpSource)) OR + // storeInd(indirDst, indirSrc(indirOpSource, indirCandidate) in case of commutative operations OR + // storeInd(indirDst, indirSrc(indirCandidate) in case of unary operations + // + // Here indirSrc = one of the supported binary or unary operation for RMW of memory + // indirCandidate = a GT_IND node + // indirCandidateChild = operand of GT_IND indirCandidate + // + // The logic below essentially does the following + // set storeInd src count to that of the dst count of indirOpSource + // clear operand counts on indirSrc (i.e. marked as contained and storeInd will generate code for it) + // clear operand counts on indirCandidate + // clear operand counts on indirDst except when it is a GT_LCL_VAR or GT_CNS_INT that doesn't fit within addr base + // Increment src count of storeInd to account for the registers required to form indirDst addr mode + // clear operand counts on indirCandidateChild + TreeNodeInfo* info = &(storeInd->gtLsraInfo); info->dstCount = 0; + if (GenTree::OperIsBinary(oper)) + { + // On Xarch RMW operations require that the source memory-op be in a register. + assert(!indirOpSource->isMemoryOp() || indirOpSource->gtLsraInfo.dstCount == 1); + JITDUMP("Lower succesfully detected an assignment of the form: *addrMode BinOp= source\n"); + info->srcCount = indirOpSource->gtLsraInfo.dstCount; + } + else + { + assert(GenTree::OperIsUnary(oper)); + JITDUMP("Lower succesfully detected an assignment of the form: *addrMode = UnaryOp(*addrMode)\n"); + info->srcCount = 0; + } + DISPTREE(storeInd); + m_lsra->clearOperandCounts(indirSrc); m_lsra->clearOperandCounts(indirCandidate); + GenTreePtr indirCandidateChild = indirCandidate->gtGetOp1(); if (indirCandidateChild->OperGet() == GT_LEA) { @@ -3059,18 +3370,35 @@ void Lowering::SetStoreIndOpCounts(GenTreePtr storeInd, GenTreePtr indirCandidat m_lsra->clearOperandCounts(indirDst); } - else + else { - assert(indirCandidateChild->OperGet() == GT_LCL_VAR || indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR); - info->srcCount += indirCandidateChild->gtLsraInfo.dstCount; + assert(indirCandidateChild->OperGet() == GT_LCL_VAR || + indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || + indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR || + indirCandidateChild->OperGet() == GT_CNS_INT); + // If it is a GT_LCL_VAR, it still needs the reg to hold the address. - // However for GT_CLS_VAR_ADDR, we don't need that reg to hold the address, because field address value is known at this time. - if(indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR) + // We would still need a reg for GT_CNS_INT if it doesn't fit within addressing mode base. + // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit time. + // Also, we don't need a reg for GT_CLS_VAR_ADDR. + if (indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR) + { + m_lsra->clearOperandCounts(indirDst); + } + else if (indirCandidateChild->IsCnsIntOrI() && + indirCandidateChild->AsIntConCommon()->FitsInAddrBase(comp)) { m_lsra->clearOperandCounts(indirDst); } + else + { + // Need a reg and hence increment src count of storeind + info->srcCount += indirCandidateChild->gtLsraInfo.dstCount; + } } m_lsra->clearOperandCounts(indirCandidateChild); + + return true; } /** @@ -3092,10 +3420,18 @@ void Lowering::SetMulOpCounts(GenTreePtr tree) // Case of float/double mul. if (varTypeIsFloating(tree->TypeGet())) { + assert(tree->OperGet() == GT_MUL); + if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl()) { MakeSrcContained(tree, op2); } + else if (op1->IsCnsNonZeroFltOrDbl() || (op1->isMemoryOp() && IsSafeToContainMem(tree, op1))) + { + // Since GT_MUL is commutative, we will try to re-order operands if it is safe to + // generate more efficient code sequence for the case of GT_MUL(op1=memOp, op2=non-memOp) + MakeSrcContained(tree, op1); + } return; } @@ -3232,9 +3568,16 @@ bool Lowering::IsCallTargetInRange(void* addr) bool Lowering:: IsContainableImmed(GenTree* parentNode, GenTree* childNode) { if (!childNode->IsIntCnsFitsInI32()) + { return false; - if (childNode->IsIconHandle() && comp->opts.compReloc) + } + + // At this point we know that it is an int const fits within 4-bytes and hence can safely cast to IntConCommon. + // Icons that need relocation should never be marked as contained immed + if (childNode->AsIntConCommon()->ImmedValNeedsReloc(comp)) + { return false; + } return true; } diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 5515f83dc9..78815633cc 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -1783,29 +1783,6 @@ void LinearScan::identifyCandidates() varDsc->lvOtherReg = REG_STK; #endif // _TARGET_64BIT_ -#ifdef FEATURE_SIMD - // Set the SIMD type appropriately, according to whether the size is the full - // vector register length, or some subset (e.g. Vector2f on SSE2 and - // all the fixed types on AVX). - if (varDsc->lvIsSIMDType() && (varDsc->TypeGet() != TYP_BYREF)) - { - // If this is a reg arg that's passed fully in a register, don't change the type - // as it has been passed in an integer register, and we'll mess up the prolog - // handling if we change its type to TYP_DOUBLE. - // Instead, we'll lave it is TYP_STRUCT and not enregister it. - // TODO-XArch-CQ: Improve the handling of these. - // We currently also have issues with the morpher transforming pointer-size structs - // into longs in unanticipated ways, so for now we will not enregister these types. - if (varDsc->lvSize() > TARGET_POINTER_SIZE) - { - var_types simdType = compiler->getSIMDTypeForSize(varDsc->lvSize()); - varDsc->lvType = simdType; - newInt->registerType = simdType; - newInt->registerPreferences = allRegs(simdType); - } - } -#endif // FEATURE_SIMD - #if !defined(_TARGET_64BIT_) if(intervalType == TYP_LONG) { @@ -1838,14 +1815,13 @@ void LinearScan::identifyCandidates() varDsc->lvLRACandidate = 0; } - // Variables that are address-exposed, and all struct locals, are never enregistered, or tracked. - // (The struct may be promoted, and its field variables enregistered/tracked, or the VM may "normalize" - // its type so that its not seen by the JIT as a struct.) + // Variables that are address-exposed are never enregistered, or tracked. + // A struct may be promoted, and a struct that fits in a register may be fully enregistered. // Pinned variables may not be tracked (a condition of the GCInfo representation) // or enregistered, on x86 -- it is believed that we can enregister pinned (more properly, "pinning") // references when using the general GC encoding. - if (varDsc->lvAddrExposed || (varDsc->lvType == TYP_STRUCT && !varDsc->lvIsSIMDType())) + if (varDsc->lvAddrExposed || !varTypeIsEnregisterableStruct(varDsc)) { varDsc->lvLRACandidate = 0; #ifdef DEBUG @@ -1897,12 +1873,21 @@ void LinearScan::identifyCandidates() case TYP_LONG: case TYP_REF: case TYP_BYREF: + break; + #ifdef FEATURE_SIMD case TYP_SIMD12: case TYP_SIMD16: case TYP_SIMD32: -#endif // FEATURE_SIMD + if (varDsc->lvPromoted) + { + varDsc->lvLRACandidate = 0; + } break; + // TODO-1stClassStructs: Move TYP_SIMD8 up with the other SIMD types, after handling the param issue + // (passing & returning as TYP_LONG). + case TYP_SIMD8: +#endif // FEATURE_SIMD case TYP_STRUCT: { @@ -2476,6 +2461,7 @@ LinearScan::getKillSetForNode(GenTree* tree) } } break; + case GT_LSH: case GT_RSH: case GT_RSZ: @@ -2496,8 +2482,7 @@ LinearScan::getKillSetForNode(GenTree* tree) { killMask = RBM_INT_CALLEE_TRASH; } -#if (defined(_TARGET_AMD64_) || defined (_TARGET_ARM_)) && !defined(RYUJIT_CTPBUILD) - +#if defined(_TARGET_AMD64_) || defined (_TARGET_ARM_) // AMD (and ARM) helpers for this save the return value killMask &= ~(RBM_INTRET | RBM_FLOATRET); #endif @@ -2985,7 +2970,7 @@ LinearScan::buildRefPositionsForNode(GenTree *tree, // We can have a case where the source of the store has a different register type, // e.g. when the store is of a return value temp, and op1 is a Vector2 - // (8-byte SIMD, which is TYP_DOUBLE at this point). We will need to set the + // (TYP_SIMD8). We will need to set the // src candidates accordingly on op1 so that LSRA will generate a copy. // We could do this during Lowering, but at that point we don't know whether // this lclVar will be a register candidate, and if not, we would prefer to leave @@ -3077,6 +3062,7 @@ LinearScan::buildRefPositionsForNode(GenTree *tree, regMaskTP candidates = getUseCandidates(useNode); Interval *i = locInfo.interval; +#ifdef FEATURE_SIMD if (tree->OperIsLocalStore() && varDefInterval == nullptr) { // This is a non-candidate store. If this is a SIMD type, the use candidates @@ -3086,11 +3072,14 @@ LinearScan::buildRefPositionsForNode(GenTree *tree, { noway_assert((candidates & allRegs(useNode->gtType)) != RBM_NONE); // Currently, the only case where this should happen is for a TYP_LONG - // source and a TYP_DOUBLE target. - assert(useNode->gtType == TYP_LONG && tree->gtType == TYP_DOUBLE); + // source and a TYP_SIMD8 target. + assert((useNode->gtType == TYP_LONG && tree->gtType == TYP_SIMD8) || + (useNode->gtType == TYP_SIMD8 && tree->gtType == TYP_LONG)); tree->gtType = useNode->gtType; } } +#endif // FEATURE_SIMD + bool delayRegFree = (hasDelayFreeSrc && useNode->gtLsraInfo.isDelayFree); if (useNode->gtLsraInfo.isTgtPref) { @@ -3128,7 +3117,7 @@ LinearScan::buildRefPositionsForNode(GenTree *tree, regNumber physicalReg = genRegNumFromMask(fixedAssignment); RefPosition *pos = newRefPosition (physicalReg, currentLoc, RefTypeFixedReg, nullptr, fixedAssignment); } - pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, i->recentRefPosition->registerAssignment); + pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, allRegs(i->registerType)); pos->registerAssignment = candidates; } else @@ -3408,8 +3397,12 @@ LinearScan::updateRegStateForArg(LclVarDsc* argDsc) ); #ifdef _TARGET_ARM_ - if (argDsc->lvIsHfaRegArg) isFloat = true; + if (argDsc->lvIsHfaRegArg) + { + isFloat = true; + } #endif // _TARGET_ARM_ + if (isFloat) { JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); @@ -3418,6 +3411,12 @@ LinearScan::updateRegStateForArg(LclVarDsc* argDsc) else { JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); +#if FEATURE_MULTIREG_STRUCT_ARGS + if (argDsc->lvOtherArgReg != REG_NA) + { + JITDUMP("(second half) in reg %s\n", getRegName(argDsc->lvOtherArgReg)); + } +#endif compiler->raUpdateRegStateForArg(intRegState, argDsc); } } @@ -3624,7 +3623,7 @@ LinearScan::buildIntervals() } RefPosition * pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask); } - else if (argDsc->lvType == TYP_STRUCT) + else if (varTypeIsStruct(argDsc->lvType)) { for (unsigned fieldVarNum = argDsc->lvFieldLclStart; fieldVarNum < argDsc->lvFieldLclStart + argDsc->lvFieldCnt; @@ -4042,7 +4041,18 @@ LinearScan::setFrameType() // frame if needed. Note that this feature isn't on for amd64, because the stack is // always double-aligned by default. compiler->codeGen->setDoubleAlign(false); -#endif + + // TODO-CQ: Tune this (see regalloc.cpp, in which raCntWtdStkDblStackFP is used to + // determine whether to double-align). Note, though that there is at least one test + // (jit\opt\Perf\DoubleAlign\Locals.exe) that depends on double-alignment being set + // in certain situations. + if (!compiler->opts.MinOpts() && + !compiler->codeGen->isFramePointerRequired() && + compiler->compFloatingPointUsed) + { + frameType = FT_DOUBLE_ALIGN_FRAME; + } +#endif // DOUBLE_ALIGN switch (frameType) { @@ -4245,7 +4255,7 @@ LinearScan::getRegisterType(Interval *currentInterval, RefPosition* refPosition) #if defined(FEATURE_SIMD) && defined(_TARGET_AMD64_) if ((candidates & allRegs(regType)) == RBM_NONE) { - assert((regType == TYP_DOUBLE) && + assert((regType == TYP_SIMD8) && (refPosition->refType == RefTypeUse) && ((candidates & allRegs(TYP_INT)) != RBM_NONE)); regType = TYP_INT; @@ -4420,6 +4430,7 @@ LinearScan::tryAllocateFreeReg(Interval *currentInterval, RefPosition *refPositi // position, which is one location past the use (getRefEndLocation() takes care of this). LsraLocation rangeEndLocation = rangeEndRefPosition->getRefEndLocation(); LsraLocation lastLocation = lastRefPosition->getRefEndLocation(); + regNumber prevReg = REG_NA; if (currentInterval->assignedReg) { @@ -4431,7 +4442,7 @@ LinearScan::tryAllocateFreeReg(Interval *currentInterval, RefPosition *refPositi // only if it is preferred and available. RegRecord *regRec = currentInterval->assignedReg; - regNumber prevReg = regRec->regNum; + prevReg = regRec->regNum; regMaskTP prevRegBit = genRegMask(prevReg); // Is it in the preferred set of regs? @@ -4512,13 +4523,54 @@ LinearScan::tryAllocateFreeReg(Interval *currentInterval, RefPosition *refPositi // An optimization for the common case where there is only one candidate - // avoid looping over all the other registers - regNumber singleReg; + regNumber singleReg = REG_NA; if (genMaxOneBit(candidates)) { regOrderSize = 1; singleReg = genRegNumFromMask(candidates); regOrder = &singleReg; } +#if FEATURE_MULTIREG_STRUCTS + if (regType == TYP_STRUCT) + { +#ifdef _TARGET_ARM64_ + // For now we can special case this case as it is used to + // pass arguments in pairs of consecutive registers + // + // TODO ARM64 - this is not complete and is really just a workaround + // that allows us to pass 16-byte structs in argment registers + // Additional work is require to properly reserve the second register + // + if (genCountBits(candidates) == 2) + { + // We currently are only expecting to handle setting up argument registers + // with this code sequence + // So both register bits in candidates should be arg registers + // + if ((candidates & RBM_ARG_REGS) == candidates) + { + // Make sure that we have two consecutive registers available + regMaskTP lowRegBit = genFindLowestBit(candidates); + regMaskTP nextRegBit = lowRegBit << 1; + if (candidates == (lowRegBit | nextRegBit)) + { + // We use the same trick as above when regOrderSize, singleReg and regOrder are set + regOrderSize = 1; + singleReg = genRegNumFromMask(lowRegBit); + regOrder = &singleReg; + } + } + } +#endif + // Unless we setup singleReg we have to issue an NYI error here + if (singleReg == REG_NA) + { + // Need support for MultiReg sized structs + NYI("Multireg struct - LinearScan::tryAllocateFreeReg"); + } + + } +#endif // FEATURE_MULTIREG_STRUCTS for (unsigned i = 0; i < regOrderSize && (candidates != RBM_NONE); i++) { @@ -4577,7 +4629,19 @@ LinearScan::tryAllocateFreeReg(Interval *currentInterval, RefPosition *refPositi if ((refPosition->treeNode->AsIntCon()->IconValue() == otherTreeNode->AsIntCon()->IconValue()) && (varTypeGCtype(refPosition->treeNode) == varTypeGCtype(otherTreeNode))) { - score |= VALUE_AVAILABLE; +#ifdef _TARGET_64BIT_ + // If the constant is negative, only reuse registers of the same type. + // This is because, on a 64-bit system, we do not sign-extend immediates in registers to + // 64-bits unless they are actually longs, as this requires a longer instruction. + // This doesn't apply to a 32-bit system, on which long values occupy multiple registers. + // (We could sign-extend, but we would have to always sign-extend, because if we reuse more + // than once, we won't have access to the instruction that originally defines the constant). + if ((refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet()) || + (refPosition->treeNode->AsIntCon()->IconValue() >= 0)) +#endif // _TARGET_64BIT_ + { + score |= VALUE_AVAILABLE; + } } break; case GT_CNS_DBL: @@ -4592,7 +4656,7 @@ LinearScan::tryAllocateFreeReg(Interval *currentInterval, RefPosition *refPositi break; } default: - // for all other 'operTreeNodee->OperGet()' kinds, we leave 'score' unchanged + // for all other 'otherTreeNode->OperGet()' kinds, we leave 'score' unchanged break; } } @@ -4666,9 +4730,18 @@ LinearScan::tryAllocateFreeReg(Interval *currentInterval, RefPosition *refPositi } } // If both cover the range, prefer a register that is killed sooner (leaving the longer range register available). - else if (nextPhysRefLocation > lastLocation && nextPhysRefLocation < bestLocation) + // If both cover the range and also getting killed at the same location, prefer the one which is same as previous + // assignment. + else if (nextPhysRefLocation > lastLocation) { - foundBetterCandidate = true; + if (nextPhysRefLocation < bestLocation) + { + foundBetterCandidate = true; + } + else if (nextPhysRefLocation == bestLocation && prevReg == regNum) + { + foundBetterCandidate = true; + } } } @@ -6719,6 +6792,7 @@ LinearScan::insertCopyOrReload(GenTreePtr tree, RefPosition* refPosition) { GenTreePtr* parentChildPointer = nullptr; GenTreePtr parent = tree->gtGetParent(&parentChildPointer); + noway_assert(parent != nullptr && parentChildPointer != nullptr); // Create the new node, with "tree" as its only child. genTreeOps oper; @@ -6731,6 +6805,9 @@ LinearScan::insertCopyOrReload(GenTreePtr tree, RefPosition* refPosition) oper = GT_COPY; } + var_types treeType = tree->TypeGet(); + +#ifdef FEATURE_SIMD // Check to see whether we need to move to a different register set. // This currently only happens in the case of SIMD vector types that are small enough (pointer size) // that they must be passed & returned in integer registers. @@ -6738,11 +6815,11 @@ LinearScan::insertCopyOrReload(GenTreePtr tree, RefPosition* refPosition) // and refPosition->registerAssignment is the mask for the register we are moving TO. // If they don't match, we need to reverse the type for the "move" node. - var_types treeType = tree->TypeGet(); if ((allRegs(treeType) & refPosition->registerAssignment) == 0) { - treeType = (varTypeIsFloating(treeType)) ? TYP_I_IMPL : TYP_DOUBLE; + treeType = (useFloatReg(treeType)) ? TYP_I_IMPL : TYP_SIMD8; } +#endif // FEATURE_SIMD GenTreePtr newNode = compiler->gtNewOperNode(oper, treeType, tree); assert(refPosition->registerAssignment != RBM_NONE); @@ -6900,12 +6977,12 @@ LinearScan::recordMaxSpill() if (needDoubleTmpForFPCall || (returnType == TYP_DOUBLE)) { JITDUMP("Adding a spill temp for moving a double call/return value between xmm reg and x87 stack.\n"); - maxSpill[TYP_DOUBLE] = 1; + maxSpill[TYP_DOUBLE] += 1; } if (needFloatTmpForFPCall || (returnType == TYP_FLOAT)) { JITDUMP("Adding a spill temp for moving a float call/return value between xmm reg and x87 stack.\n"); - maxSpill[TYP_FLOAT] = 1; + maxSpill[TYP_FLOAT] += 1; } #endif // _TARGET_X86_ for (int i = 0; i < TYP_COUNT; i++) @@ -7636,7 +7713,8 @@ LinearScan::insertMove(BasicBlock * block, } else { - compiler->fgInsertStmtNearEnd(block, stmt); + assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS); + compiler->fgInsertStmtAtEnd(block, stmt); } } } @@ -7728,7 +7806,8 @@ LinearScan::insertSwap(BasicBlock* block, } else { - compiler->fgInsertStmtNearEnd(block, stmt); + assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS); + compiler->fgInsertStmtAtEnd(block, stmt); } } } @@ -7839,10 +7918,10 @@ LinearScan::addResolution(BasicBlock* block, } //------------------------------------------------------------------------ -// handleOutoingCriticalEdges: Performs the necessary resolution on all critical edges that feed out of 'block' +// handleOutgoingCriticalEdges: Performs the necessary resolution on all critical edges that feed out of 'block' // // Arguments: -// block - the block with incoming critical edges. +// block - the block with outgoing critical edges. // // Return Value: // None.. @@ -7853,7 +7932,7 @@ LinearScan::addResolution(BasicBlock* block, // and generate the resolution code into that block. void -LinearScan::handleOutoingCriticalEdges(BasicBlock* block) +LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) { VARSET_TP VARSET_INIT_NOCOPY(sameResolutionSet, VarSetOps::MakeEmpty(compiler)); VARSET_TP VARSET_INIT_NOCOPY(sameLivePathsSet, VarSetOps::MakeEmpty(compiler)); @@ -7922,6 +8001,7 @@ LinearScan::handleOutoingCriticalEdges(BasicBlock* block) bool isSame = false; bool maybeSingleTarget = false; bool maybeSameLivePaths = false; + bool liveOnlyAtSplitEdge = true; regNumber sameToReg = REG_NA; for (unsigned succIndex = 0; succIndex < succCount; succIndex++) { @@ -7931,6 +8011,12 @@ LinearScan::handleOutoingCriticalEdges(BasicBlock* block) maybeSameLivePaths = true; continue; } + else if (liveOnlyAtSplitEdge) + { + // Is the var live only at those target blocks which are connected by a split edge to this block + liveOnlyAtSplitEdge = ((succBlock->bbPreds->flNext == nullptr) && (succBlock != compiler->fgFirstBB)); + } + regNumber toReg = getVarReg(getInVarToRegMap(succBlock->bbNum), varNum); if (sameToReg == REG_NA) { @@ -7965,6 +8051,18 @@ LinearScan::handleOutoingCriticalEdges(BasicBlock* block) { sameToReg = REG_NA; } + + // If the var is live only at those blocks connected by a split edge and not live-in at some of the + // target blocks, we will resolve it the same way as if it were in diffResolutionSet and resolution + // will be deferred to the handling of split edges, which means copy will only be at those target(s). + // + // Another way to achieve similar resolution for vars live only at split edges is by removing them + // from consideration up-front but it requires that we traverse those edges anyway to account for + // the registers that must note be overwritten. + if (liveOnlyAtSplitEdge && maybeSameLivePaths) + { + sameToReg = REG_NA; + } } if (sameToReg == REG_NA) @@ -8083,7 +8181,7 @@ LinearScan::resolveEdges() } if (blockInfo[block->bbNum].hasCriticalOutEdge) { - handleOutoingCriticalEdges(block); + handleOutgoingCriticalEdges(block); } prevBlock = block; } @@ -8642,13 +8740,6 @@ void dumpRegMask(regMaskTP regs) } } - -const char *gtOpNames[] = -{ - #define GTNODE(en,sn,cm,ok) #en , - #include "gtlist.h" -}; - void RefPosition::dump() { printf("<RefPosition #%-3u @%-3u", rpNum, nodeLocation); @@ -8664,7 +8755,7 @@ void RefPosition::dump() this->getInterval()->tinyDump(); if (this->treeNode) - printf("%s ", gtOpNames[treeNode->OperGet()]); + printf("%s ", treeNode->OpName(treeNode->OperGet())); printf("BB%02u ", this->bbNum); printf("regmask="); @@ -9686,6 +9777,7 @@ LinearScan::dumpRegRecordHeader() // l is either '*' (if a last use) or ' ' (otherwise) // d is either 'D' (if a delayed use) or ' ' (otherwise) + maxNodeLocation = (maxNodeLocation == 0) ? 1: maxNodeLocation; // corner case of a method with an infinite loop without any gentree nodes assert(maxNodeLocation >= 1); assert(refPositionCount >= 1); int nodeLocationWidth = (int)log10((double)maxNodeLocation) + 1; diff --git a/src/jit/lsra.h b/src/jit/lsra.h index cef6669513..69acddc202 100644 --- a/src/jit/lsra.h +++ b/src/jit/lsra.h @@ -381,7 +381,7 @@ public: regNumber outReg, regNumber inReg); - void handleOutoingCriticalEdges(BasicBlock* block); + void handleOutgoingCriticalEdges(BasicBlock* block); void resolveEdge (BasicBlock* fromBlock, BasicBlock* toBlock, diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 9d032f2ab8..71f7795660 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -349,8 +349,8 @@ GenTreePtr Compiler::fgMorphCast(GenTreePtr tree) { case TYP_INT: #ifdef _TARGET_X86_ // there is no rounding convert to integer instruction on ARM or x64 so skip this - if ((oper->gtOper == GT_MATH) && - (oper->gtMath.gtMathFN == CORINFO_INTRINSIC_Round)) + if ((oper->gtOper == GT_INTRINSIC) && + (oper->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round)) { /* optimization: conv.i4(round.d(d)) -> round.i(d) */ oper->gtType = dstType; @@ -1444,6 +1444,7 @@ void fgArgInfo::ArgsComplete() assert(curArgTabEntry != NULL); GenTreePtr argx = curArgTabEntry->node; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // If this is a struct, mark it for needing a tempVar. // In the copyblk and store this should have minimal perf impact since // the local vars where we copy/store to already exist and the logic for temp @@ -1457,6 +1458,7 @@ void fgArgInfo::ArgsComplete() { curArgTabEntry->needTmp = true; } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) if (curArgTabEntry->regNum == REG_STK) { @@ -1471,7 +1473,7 @@ void fgArgInfo::ArgsComplete() } else // we have a register argument, next we look for a TYP_STRUCT { - if (argx->TypeGet() == TYP_STRUCT + if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || curArgTabEntry->isStruct)) { hasStructRegArg = true; @@ -1996,10 +1998,13 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum // Create a copy of the temp to go into the late argument list GenTreePtr arg = gtNewLclvNode(tmpVarNum, type); -#if defined(_TARGET_AMD64_) - if (type == TYP_STRUCT) + if (varTypeIsStruct(type)) { -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + +#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + arg->gtFlags |= GTF_DONT_CSE; // If it is passed in registers, don't get the address of the var. Make it a @@ -2013,13 +2018,25 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum { arg = gtNewOperNode(GT_ADDR, TYP_STRUCT, arg); } + #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING - switch (lvaLclExactSize(tmpVarNum)) + + unsigned structSize = lvaLclExactSize(tmpVarNum); + + switch (structSize) { case 1: type = TYP_BYTE; break; case 2: type = TYP_SHORT; break; +#if defined (_TARGET_AMD64_) + case 4: type = TYP_INT; break; +#else defined(_TARGET_ARM64_) + case 3: case 4: type = TYP_INT; break; - case 8: + case 5: + case 6: + case 7: type = TYP_I_IMPL; break; +#endif // defined (_TARGET_ARM64_) + case 8: switch (*lvaGetGcLayout(tmpVarNum)) { case TYPE_GC_NONE: @@ -2043,27 +2060,55 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum // its structure doesn't support to be passed directly through a // register, so we need to pass a pointer to the destination where // where we copied the struct to. - if (type == TYP_STRUCT) + if (type == varDsc->TypeGet()) { - arg->gtFlags |= GTF_DONT_CSE; - arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg); +#if FEATURE_MULTIREG_STRUCTS +#ifdef _TARGET_ARM64_ + assert(varTypeIsStruct(type)); + if (structSize <= MAX_PASS_MULTIREG_BYTES) + { + assert(structSize > TARGET_POINTER_SIZE); // structSize must be 9..16 + + // ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD); + // as that is how FEATURE_UNIX_AMD64_STRUCT_PASSING works. + + // Pass by value in two registers + arg->gtFlags |= GTF_DONT_CSE; + arg = gtNewOperNode(GT_ADDR, TYP_BYREF, arg); + + // Ldobj the temp to use it as a call argument + arg = gtNewLdObjNode(lvaGetStruct(tmpVarNum), arg); + } + else +#endif // _TARGET_ARM64_ +#endif // FEATURE_MULTIREG_STRUCTS + { + arg->gtFlags |= GTF_DONT_CSE; + arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg); + } } - else + else // type was changed from a struct to a scalar type { arg->ChangeOper(GT_LCL_FLD); arg->gtType = type; } -#endif // !!FEATURE_UNIX_AMD64_STRUCT_PASSING - } -#else // _TARGET_AMD64_ +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING - arg->gtFlags |= GTF_DONT_CSE; - arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg); - // Ldobj the temp to use it as a call argument - arg = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, arg, lvaGetStruct(tmpVarNum)); - arg->gtFlags |= GTF_EXCEPT; +#else // not (_TARGET_AMD64_ or _TARGET_ARM64_) -#endif // _TARGET_AMD64_ + // other targets, we pass the struct by value + assert(varTypeIsStruct(type)); + + arg->gtFlags |= GTF_DONT_CSE; + arg = gtNewOperNode(GT_ADDR, TYP_BYREF, arg); + + // Ldobj the temp to use it as a call argument + arg = gtNewLdObjNode(lvaGetStruct(tmpVarNum), arg); + arg->gtFlags |= GTF_EXCEPT; + +#endif // not (_TARGET_AMD64_ or _TARGET_ARM64_) + + } // (varTypeIsStruct(type)) return arg; } @@ -2091,7 +2136,7 @@ void fgArgInfo::EvalArgsToTemps() // Only the register arguments need to be replaced with placeholders node // stacked arguments are evaluated and pushed in order // - if (curArgTabEntry->regNum == REG_STK && !curArgTabEntry->needTmp) + if (curArgTabEntry->regNum == REG_STK) continue; #endif @@ -2241,9 +2286,9 @@ void fgArgInfo::EvalArgsToTemps() defArg = argx; - /* Create a placeholder node to put in its place in gtCallLateArgs */ + // Create a placeholder node to put in its place in gtCallLateArgs. - /* For a TYP_STRUCT we also need to record the class handle of the arg */ + // For a struct type we also need to record the class handle of the arg. CORINFO_CLASS_HANDLE clsHnd = NULL; #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) @@ -2252,7 +2297,7 @@ void fgArgInfo::EvalArgsToTemps() #else // !(defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) - if (defArg->gtType == TYP_STRUCT) + if (varTypeIsStruct(defArg)) { // Need a temp to walk any GT_COMMA nodes when searching for the clsHnd GenTreePtr defArgTmp = defArg; @@ -2262,7 +2307,7 @@ void fgArgInfo::EvalArgsToTemps() { defArgTmp = defArgTmp->gtOp.gtOp2; } - assert(defArgTmp->gtType == TYP_STRUCT); + assert(varTypeIsStruct(defArgTmp)); /* We handle two opcodes: GT_MKREFANY and GT_LDOBJ */ if (defArgTmp->gtOper == GT_MKREFANY) @@ -2460,7 +2505,7 @@ GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDL unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable")); - if (subTree->TypeGet() == TYP_STRUCT) + if (varTypeIsStruct(subTree)) { assert(structType != nullptr); lvaSetStruct(lclNum, structType, false); @@ -2598,13 +2643,19 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) nonStandardArgs.Push(nsa); } - else if (call->IsVirtualStub() - && call->gtCallType == CT_INDIRECT) + else if (call->IsVirtualStub() && + (call->gtCallType == CT_INDIRECT) && + !call->IsTailCallViaHelper()) { // indirect VSD stubs need the base of the indirection cell to be // passed in addition. At this point that is the value in gtCallAddr. // The actual call target will be derived from gtCallAddr in call // lowering. + + // If it is a VSD call getting dispatched via tail call helper, + // fgMorphTailCall() would materialize stub addr as an additional + // parameter added to the original arg list and hence no need to + // add as a non-standard arg. GenTree* arg = call->gtCallAddr; if (arg->OperIsLocal()) @@ -2959,7 +3010,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) unsigned int structFloatRegs = 0; unsigned int structIntRegs = 0; #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - bool isStructArg = argx->gtType == TYP_STRUCT; + bool isStructArg = varTypeIsStruct(argx); if (lateArgsComputed) { @@ -3031,7 +3082,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #elif defined(_TARGET_ARM64_) if (isStructArg) { - // Structs are eith passed in 1 or 2 (64-bit) slots + // Structs are either passed in 1 or 2 (64-bit) slots size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; if (size > 2) { @@ -3066,18 +3117,18 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) size = GetHfaSlots(argx); } #endif - else // argx->gtType == TYP_STRUCT + else // struct type { /* We handle two opcodes: GT_MKREFANY and GT_LDOBJ */ if (argx->gtOper == GT_MKREFANY) { - if (argx->TypeGet() == TYP_STRUCT) + if (varTypeIsStruct(argx)) { isStructArg = true; } #ifdef _TARGET_AMD64_ #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - if (argx->TypeGet() == TYP_STRUCT) + if (varTypeIsStruct(argx)) { size = info.compCompHnd->getClassSize(impGetRefAnyClass()); unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE); @@ -3185,38 +3236,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING else { - // change our GT_LDOBJ into a GT_IND of the correct type - switch (originalSize) - { - case 1: - structBaseType = TYP_BYTE; - break; - case 2: - structBaseType = TYP_SHORT; - break; - -#if TARGET_POINTER_SIZE==8 - case 4: - structBaseType = TYP_INT; - break; -#endif - case TARGET_POINTER_SIZE: - - BYTE gcPtr; - info.compCompHnd->getClassGClayout(argLdobj->gtLdObj.gtClass, &gcPtr); - - if (gcPtr == TYPE_GC_NONE) - structBaseType = TYP_I_IMPL; - else if (gcPtr == TYPE_GC_REF) - structBaseType = TYP_REF; - else if (gcPtr == TYPE_GC_BYREF) - structBaseType = TYP_BYREF; - break; - - default: - NO_WAY("Bad math"); - break; - } + // change our GT_LDOBJ into a GT_IND of the correct type. + structBaseType = argOrReturnTypeForStruct(originalSize, argLdobj->gtLdObj.gtClass, false /* forReturn */); + // We've already ensured above that size is a power of 2, and less than pointer size. + noway_assert(structBaseType != TYP_UNKNOWN); argLdobj->ChangeOper(GT_IND); @@ -3455,7 +3478,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } } #else // !defined(UNIX_AMD64_ABI) - isRegArg = intArgRegNum < maxRegArgs; + isRegArg = (intArgRegNum+(size-1)) < maxRegArgs; #endif // !defined(UNIX_AMD64_ABI) #endif // _TARGET_ARM_ } @@ -3678,7 +3701,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) if (fltArgRegNum > MAX_FLOAT_REG_ARG) { // This indicates a partial enregistration of a struct type - assert(isStructArg); + assert(varTypeIsStruct(argx)); unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG); assert((unsigned char)numRegsPartial == numRegsPartial); call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial); @@ -3735,10 +3758,11 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc)); } -#ifdef _TARGET_AMD64_ - +#ifndef LEGACY_BACKEND if (argx->gtOper == GT_MKREFANY) { + NYI_X86("MKREFANY"); + // 'Lower' the MKREFANY tree and insert it. noway_assert(!lateArgsComputed); @@ -3768,7 +3792,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) call->fgArgInfo->EvalToTmp(argIndex, tmp, asg); lvaSetVarAddrExposed(tmp); } -#endif // _TARGET_AMD64_ +#endif // !LEGACY_BACKEND argIndex++; #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING @@ -4123,9 +4147,9 @@ Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, if (lcl->OperIsLocal()) { unsigned varNum = lcl->AsLclVarCommon()->GetLclNum(); - LclVarDsc* varDsc = &lvaTable[varNum]; - if (varDsc->lvIsParam && varDsc->lvIsTemp) + if (lvaIsImplicitByRefLocal(varNum)) { + LclVarDsc* varDsc = &lvaTable[varNum]; if (varDsc->lvRefCnt == 1 && !fgMightHaveLoop()) { varDsc->lvRefCnt = 0; @@ -4191,9 +4215,10 @@ Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, } // Create a reference to the temp - GenTreePtr dest = gtNewLclvNode(tmp, TYP_STRUCT); + GenTreePtr dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType); dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction. - dest = gtNewOperNode(GT_ADDR, TYP_I_IMPL, dest); + dest = gtNewOperNode(GT_ADDR, TYP_BYREF, dest); + lvaTable[tmp].incRefCnts(compCurBB->getBBWeight(this), this); GenTreePtr src; if (argx->gtOper == GT_LDOBJ) @@ -4203,7 +4228,7 @@ Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, else { argx->gtFlags |= GTF_DONT_CSE; - src = gtNewOperNode(GT_ADDR, TYP_I_IMPL, argx); + src = gtNewOperNode(GT_ADDR, TYP_BYREF, argx); } // Copy the valuetype to the temp @@ -4619,11 +4644,7 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree) else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT) { lenOffs = offsetof(CORINFO_RefArray, length); -#ifndef RYUJIT_CTPBUILD elemOffs = eeGetEEInfo()->offsetOfObjArrayData; -#else - elemOffs = offsetof(CORINFO_RefArray, refElems); -#endif } else // We have a standard array { @@ -5002,8 +5023,7 @@ GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree) #if LOCAL_ASSERTION_PROP /* Assertion prop can tell us to omit adding a cast here */ if (optLocalAssertionProp && - optAssertionIsSubrange(tree, varType, EXPSET_ALL) != NO_ASSERTION_INDEX) - + optAssertionIsSubrange(tree, varType, apFull) != NO_ASSERTION_INDEX) { return tree; } @@ -5063,6 +5083,13 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* ma CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd; unsigned fldOffset = tree->gtField.gtFldOffset; GenTreePtr objRef = tree->gtField.gtFldObj; + bool fieldMayOverlap = false; + if (tree->gtField.gtFldMayOverlap) + { + fieldMayOverlap = true; + // Reset the flag because we may reuse the node. + tree->gtField.gtFldMayOverlap = false; + } #ifdef FEATURE_SIMD // if this field belongs to simd struct, tranlate it to simd instrinsic. @@ -5075,7 +5102,7 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* ma return newTree; } } - else if (objRef != nullptr && objRef->OperGet() == GT_ADDR && varTypeIsSIMD(objRef->gtOp.gtOp1)) + else if (objRef != nullptr && objRef->OperGet() == GT_ADDR && objRef->OperIsSIMD()) { // We have a field of an SIMD intrinsic in an address-taken context. // We need to copy the SIMD result to a temp, and take the field of that. @@ -5273,7 +5300,8 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* ma { // Generate the "addr" node. addr = objRef; - GetZeroOffsetFieldMap()->Set(addr, GetFieldSeqStore()->CreateSingleton(symHnd)); + FieldSeqNode* fieldSeq = fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); + GetZeroOffsetFieldMap()->Set(addr, fieldSeq); } else { @@ -5300,13 +5328,14 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* ma { // Generate the "addr" node. /* Add the member offset to the object's address */ + FieldSeqNode* fieldSeq = fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr, gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, - GetFieldSeqStore()->CreateSingleton(symHnd))); + fieldSeq)); } // Now let's set the "tree" as a GT_IND tree. @@ -5316,7 +5345,7 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* ma if (fgAddrCouldBeNull(addr)) { - /* This indirection can cause a GPF if the address is could be null */ + // This indirection can cause a GPF if the address could be null. tree->gtFlags |= GTF_EXCEPT; } @@ -5418,8 +5447,8 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* ma if (fldOffset != 0) { - GenTreePtr fldOffsetNode = new(this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, GetFieldSeqStore()->CreateSingleton(symHnd) - ); + FieldSeqNode* fieldSeq = fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); + GenTreePtr fldOffsetNode = new(this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq); /* Add the TLS static field offset to the address */ @@ -5448,15 +5477,15 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* ma if (pFldAddr == NULL) { #ifdef _TARGET_64BIT_ - - if (IMAGE_REL_BASED_REL32 != info.compCompHnd->getRelocTypeHint(fldAddr)) + if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr)) { // The address is not directly addressible, so force it into a // constant, so we handle it properly GenTreePtr addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL); addr->gtType = TYP_I_IMPL; - addr->gtIntCon.gtFieldSeq = GetFieldSeqStore()->CreateSingleton(symHnd); + FieldSeqNode* fieldSeq = fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); + addr->gtIntCon.gtFieldSeq = fieldSeq; tree->SetOper(GT_IND); tree->gtOp.gtOp1 = addr; @@ -5471,7 +5500,8 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* ma noway_assert(GTF_FLD_VOLATILE == GTF_IND_VOLATILE); tree->SetOper(GT_CLS_VAR); tree->gtClsVar.gtClsVarHnd = symHnd; - tree->gtClsVar.gtFieldSeq = GetFieldSeqStore()->CreateSingleton(symHnd); + FieldSeqNode* fieldSeq = fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); + tree->gtClsVar.gtFieldSeq = fieldSeq; } return tree; @@ -5498,35 +5528,14 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* ma } noway_assert(tree->gtOper == GT_IND); -#ifdef FEATURE_SIMD - if (featureSIMD && tree->gtType == TYP_STRUCT) - { - CORINFO_CLASS_HANDLE fieldStructType = nullptr; - noway_assert(symHnd != nullptr); - (void) info.compCompHnd->getFieldType(symHnd, &fieldStructType); - if (fieldStructType != nullptr) - { - // If this is a SIMD type, this is the point at which we lose the type information, - // so we need to set the correct type on the GT_IND. - unsigned simdFieldSize = 0; - if (getBaseTypeAndSizeOfSIMDType(fieldStructType, &simdFieldSize) != TYP_UNKNOWN) - { - var_types simdType = getSIMDTypeForSize(simdFieldSize); - // This is the new type of the node. - tree->gtType = simdType; - } - - } - } -#endif // FEATURE_SIMD - GenTreePtr res = fgMorphSmpOp(tree); if (fldOffset == 0 && res->OperGet() == GT_IND) { GenTreePtr addr = res->gtOp.gtOp1; // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node. - fgAddFieldSeqForZeroOffset(addr, GetFieldSeqStore()->CreateSingleton(symHnd)); + FieldSeqNode* fieldSeq = fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); + fgAddFieldSeqForZeroOffset(addr, fieldSeq); } return res; @@ -5536,14 +5545,14 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* ma /***************************************************************************** * Returns the inlined call - * Returns NULL if the call could not be inlined. + * Returns nullptr if the call could not be inlined. */ GenTreePtr Compiler::fgMorphCallInline(GenTreePtr node) { GenTreeCall* call = node->AsCall(); - GenTreePtr ret = NULL; + GenTreePtr ret = nullptr; JitInlineResult result; const char * inlineFailReason; @@ -5559,12 +5568,10 @@ GenTreePtr Compiler::fgMorphCallInline(GenTreePtr node) goto InlineFailed; } - // Ignore tail-calls, GTF_CALL_M_TAILCALL is set in fgMorphCall - if (call->IsTailCall()) - { - inlineFailReason = "Tail call"; - goto InlineFailed; - } + // impMarkInlineCandidate() is expected not to mark tail prefixed calls + // and recursive tail calls as inline candidates. + noway_assert(!call->IsTailPrefixedCall()); + noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call)); /* If the caller's stack frame is marked, then we can't do any inlining. Period. Although we have checked this in impCanInline, it is possible that later IL instructions @@ -5695,7 +5702,7 @@ _exit: InlineFailed: result = JitInlineResult(INLINE_FAIL, call->gtCall.gtInlineCandidateInfo->ilCallerHandle, - call->gtCall.gtCallType == CT_USER_FUNC ? call->gtCall.gtCallMethHnd : NULL, + call->gtCall.gtCallType == CT_USER_FUNC ? call->gtCall.gtCallMethHnd : nullptr, inlineFailReason); goto _exit; } @@ -5768,7 +5775,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) assert(args->IsList()); GenTreePtr argx = args->gtOp.gtOp1; - if (argx->TypeGet() == TYP_STRUCT) + if (varTypeIsStruct(argx)) { // GT_LDOBJ may be a chile of a GT_COMMA. Skip over comma opers. while (argx->gtOper == GT_COMMA) @@ -5783,7 +5790,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) #ifdef _TARGET_AMD64_ unsigned typeSize = 0; - hasMultiByteArgs = !VarTypeIsMultiByteAndCanEnreg(TYP_STRUCT, argx->gtLdObj.gtClass, &typeSize); + hasMultiByteArgs = !VarTypeIsMultiByteAndCanEnreg(TYP_STRUCT, argx->gtLdObj.gtClass, nullptr, false); #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // On System V the args could be a 2 eightbyte struct that is passed in two registers. @@ -6063,13 +6070,40 @@ void Compiler::fgMorphTailCall(GenTreeCall* call) *pList = gtNewListNode(thisPtr, *pList); } + // Add the extra VSD parameter to arg list in case of VSD calls. + // Tail call arg copying thunk will move this extra VSD parameter + // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk() + // in Stublinkerx86.cpp for more details. + CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0); + if (call->IsVirtualStub()) + { + GenTreePtr stubAddrArg; + + flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG; + + if (call->gtCallType == CT_INDIRECT) + { + stubAddrArg = gtClone(call->gtCallAddr, true); + noway_assert(stubAddrArg != nullptr); + } + else + { + noway_assert((call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) != 0); + + ssize_t addr = ssize_t(call->gtStubCallStubAddr); + stubAddrArg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR); + } + + // Push the stub address onto the list of arguments + call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs); + } + // Now inject a placeholder for the real call target that Lower phase will generate. GenTreePtr arg = gtNewIconNode(0, TYP_I_IMPL); call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); // Inject the pointer for the copy routine to be used for struct copying noway_assert(call->callSig != nullptr); - CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0); void * pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags); arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR); call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); @@ -6082,6 +6116,236 @@ void Compiler::fgMorphTailCall(GenTreeCall* call) } +//------------------------------------------------------------------------------ +// fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop. +// +// +// Arguments: +// block - basic block ending with a recursive fast tail call +// recursiveTailCall - recursive tail call to transform +// +// Notes: +// The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop. + +void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall) +{ + assert(recursiveTailCall->IsTailCallConvertibleToLoop()); + GenTreePtr last = fgGetLastTopLevelStmt(block); + assert(recursiveTailCall == last->gtStmt.gtStmtExpr); + + // Transform recursive tail call into a loop. + + GenTreePtr earlyArgInsertionPoint = last; + IL_OFFSETX callILOffset = last->gtStmt.gtStmtILoffsx; + + // Hoist arg setup statement for the 'this' argument. + GenTreePtr thisArg = recursiveTailCall->gtCallObjp; + if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode()) + { + GenTreePtr thisArgStmt = gtNewStmt(thisArg, callILOffset); + fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt); + + } + + // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first; + // then the temps need to be assigned to the method parameters. This is done so that the caller + // parameters are not re-assigned before call arguments depending on them are evaluated. + // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of + // where the next temp or parameter assignment should be inserted. + + // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first + // while the second call argument (const 1) doesn't. + // Basic block before tail recursion elimination: + // ***** BB04, stmt 1 (top level) + // [000037] ------------ * stmtExpr void (top level) (IL 0x00A...0x013) + // [000033] --C - G------ - \--* call void RecursiveMethod + // [000030] ------------ | / --* const int - 1 + // [000031] ------------arg0 in rcx + --* +int + // [000029] ------------ | \--* lclVar int V00 arg1 + // [000032] ------------arg1 in rdx \--* const int 1 + // + // + // Basic block after tail recursion elimination : + // ***** BB04, stmt 1 (top level) + // [000051] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? ) + // [000030] ------------ | / --* const int - 1 + // [000031] ------------ | / --* +int + // [000029] ------------ | | \--* lclVar int V00 arg1 + // [000050] - A---------- \--* = int + // [000049] D------N---- \--* lclVar int V02 tmp0 + // + // ***** BB04, stmt 2 (top level) + // [000055] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? ) + // [000052] ------------ | / --* lclVar int V02 tmp0 + // [000054] - A---------- \--* = int + // [000053] D------N---- \--* lclVar int V00 arg0 + + // ***** BB04, stmt 3 (top level) + // [000058] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? ) + // [000032] ------------ | / --* const int 1 + // [000057] - A---------- \--* = int + // [000056] D------N---- \--* lclVar int V01 arg1 + + GenTreePtr tmpAssignmentInsertionPoint = last; + GenTreePtr paramAssignmentInsertionPoint = last; + + // Process early args. They may contain both setup statements for late args and actual args. + int earlyArgIndex = 0; + for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs; + earlyArgs != nullptr; + (earlyArgIndex++, earlyArgs = earlyArgs->Rest())) + { + GenTreePtr earlyArg = earlyArgs->Current(); + if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode()) + { + if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0) + { + // This is a setup node so we need to hoist it. + GenTreePtr earlyArgStmt = gtNewStmt(earlyArg, callILOffset); + fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt); + } + else + { + // This is an actual argument that needs to be assigned to the corresponding caller parameter. + fgArgTabEntryPtr curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex); + GenTreePtr paramAssignStmt = fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset, + tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint); + if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr)) + { + // All temp assignments will happen before the first param assignment. + tmpAssignmentInsertionPoint = paramAssignStmt; + } + } + } + } + + // Process late args. + int lateArgIndex = 0; + for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs; + lateArgs != nullptr; + (lateArgIndex++, lateArgs = lateArgs->Rest())) + { + // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter. + GenTreePtr lateArg = lateArgs->Current(); + fgArgTabEntryPtr curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex); + GenTreePtr paramAssignStmt = fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset, + tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint); + + if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr)) + { + // All temp assignments will happen before the first param assignment. + tmpAssignmentInsertionPoint = paramAssignStmt; + } + } + + // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that + // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that + // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here. + if (!info.compIsStatic && (lvaArg0Var != info.compThisArg)) + { + var_types thisType = lvaTable[info.compThisArg].TypeGet(); + GenTreePtr arg0 = gtNewLclvNode(lvaArg0Var, thisType); + GenTreePtr arg0Assignment = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType)); + GenTreePtr arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset); + fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt); + } + + // Remove the call + fgRemoveStmt(block, last); + + // Set the loop edge. + block->bbJumpKind = BBJ_ALWAYS; + block->bbJumpDest = fgFirstBBisScratch() ? fgFirstBB->bbNext : fgFirstBB; + fgAddRefPred(block->bbJumpDest, block); + block->bbFlags &= ~BBF_HAS_JMP; +} + +//------------------------------------------------------------------------------ +// fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter. +// +// +// Arguments: +// arg - argument to assign +// argTabEntry - argument table entry corresponding to arg +// block --- basic block the call is in +// callILOffset - IL offset of the call +// tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary) +// paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted +// +// Return Value: +// parameter assignment statement if one was inserted; nullptr otherwise. + +GenTreePtr Compiler::fgAssignRecursiveCallArgToCallerParam(GenTreePtr arg, + fgArgTabEntryPtr argTabEntry, + BasicBlock *block, + IL_OFFSETX callILOffset, + GenTreePtr tmpAssignmentInsertionPoint, + GenTreePtr paramAssignmentInsertionPoint) +{ + // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because + // some argument trees may reference parameters directly. + + GenTreePtr argInTemp = nullptr; + unsigned originalArgNum = argTabEntry->argNum; + bool needToAssignParameter = true; + + // TODO-CQ: enable calls with struct arguments passed in registers. + noway_assert(!varTypeIsStruct(arg->TypeGet())); + + if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl()) + { + // The argument is already assigned to a temp or is a const. + argInTemp = arg; + } + else if (arg->OperGet() == GT_LCL_VAR) + { + unsigned lclNum = arg->AsLclVar()->gtLclNum; + LclVarDsc * varDsc = &lvaTable[lclNum]; + if (!varDsc->lvIsParam) + { + // The argument is a non-parameter local so it doesn't need to be assigned to a temp. + argInTemp = arg; + } + else if (lclNum == originalArgNum) + { + // The argument is the same parameter local that we were about to assign so + // we can skip the assignment. + needToAssignParameter = false; + } + } + + // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve + // any caller parameters. Some common cases are handled above but we may be able to eliminate + // more temp assignments. + + GenTreePtr paramAssignStmt = nullptr; + if (needToAssignParameter) + { + if (argInTemp == nullptr) + { + // The argument is not assigned to a temp. We need to create a new temp and insert an assignment. + // TODO: we can avoid a temp assignment if we can prove that the argument tree + // doesn't involve any caller parameters. + unsigned tmpNum = lvaGrabTemp(true DEBUGARG("arg temp")); + GenTreePtr tempSrc = arg; + GenTreePtr tempDest = gtNewLclvNode(tmpNum, tempSrc->gtType); + GenTreePtr tmpAssignNode = gtNewAssignNode(tempDest, tempSrc); + GenTreePtr tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset); + fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt); + argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType); + } + + // Now assign the temp to the parameter. + LclVarDsc *paramDsc = lvaTable + originalArgNum; + assert(paramDsc->lvIsParam); + GenTreePtr paramDest = gtNewLclvNode(originalArgNum, paramDsc->lvType); + GenTreePtr paramAssignNode = gtNewAssignNode(paramDest, argInTemp); + paramAssignStmt = gtNewStmt(paramAssignNode, callILOffset); + + fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt); + } + return paramAssignStmt; +} /***************************************************************************** * @@ -6093,13 +6357,13 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) if (call->CanTailCall()) { // It should either be an explicit (i.e. tail prefixed) or an implicit tail call - assert((!call->IsTailPrefixedCall() || call->IsImplicitTailCall()) || - (!call->IsImplicitTailCall() || call->IsTailPrefixedCall())); + assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall()); // It cannot be an inline candidate assert(!call->IsInlineCandidate()); const char * szFailReason = nullptr; + bool hasStructParam = false; if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) { szFailReason = "Might turn into an intrinsic"; @@ -6131,7 +6395,7 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) } #endif #if FEATURE_TAILCALL_OPT - else if (call->IsImplicitTailCall()) + else { // We are still not sure whether it can be a tail call. Because, when converting // a call to an implicit tail call, we must check that there are no locals with @@ -6160,22 +6424,38 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++) { - if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed) - { - hasAddrExposedVars = true; - break; - } - if (varDsc->lvPromoted && varDsc->lvIsParam) + // If the method is marked as an explicit tail call we will skip the + // following three hazard checks. + // We still must check for any struct parameters and set 'hasStructParam' + // so that we won't transform the recursive tail call into a loop. + // + if (call->IsImplicitTailCall()) { - hasStructPromotedParam = true; - break; + if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed) + { + hasAddrExposedVars = true; + break; + } + if (varDsc->lvPromoted && varDsc->lvIsParam) + { + hasStructPromotedParam = true; + break; + } + if (varDsc->lvPinned) + { + // A tail call removes the method from the stack, which means the pinning + // goes away for the callee. We can't allow that. + hasPinnedVars = true; + break; + } } - if (varDsc->lvPinned) + if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam) { - // A tail call removes the method from the stack, which means the pinning - // goes away for the callee. We can't allow that. - hasPinnedVars = true; - break; + hasStructParam = true; + // This prevents transforming a recursive tail call into a loop + // but doesn't prevent tail call optimization so we need to + // look at the rest of parameters. + continue; } } @@ -6226,16 +6506,28 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) canFastTailCall = fgCanFastTailCall(call); if (!canFastTailCall) { + // Implicit or opportunistic tail calls are always dispatched via fast tail call + // mechanism and never via tail call helper for perf. if (call->IsImplicitTailCall()) { szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp"; } #ifndef LEGACY_BACKEND - // Methods with non-standard args will have indirection cell or cookie param passed - // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before - // tail calling the target method. - else if (call->HasNonStandardArgs()) + else if (!call->IsVirtualStub() && call->HasNonStandardArgs()) { + // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be + // dispatched as a fast tail call. + + // Methods with non-standard args will have indirection cell or cookie param passed + // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before + // tail calling the target method and hence ".tail" prefix on such calls needs to be + // ignored. + // + // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls though require + // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper. + // This is done by by adding stubAddr as an additional arg before the original list of + // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk() + // in Stublinkerx86.cpp. szFailReason = "Method with non-standard args passed in callee trash register cannot be tail called via helper"; } #endif //LEGACY_BACKEND @@ -6275,15 +6567,37 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) goto NO_TAIL_CALL; } -#if FEATURE_TAILCALL_OPT_SHARED_RETURN - // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN, - // but if the call falls through to a ret, and we are doing a tailcall, change it here. - if (compCurBB->bbJumpKind != BBJ_RETURN) - compCurBB->bbJumpKind = BBJ_RETURN; +#if !FEATURE_TAILCALL_OPT_SHARED_RETURN + // We enable shared-ret tail call optimization for recursive calls even if + // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined. + if (gtIsRecursiveCall(call)) #endif + { + // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN, + // but if the call falls through to a ret, and we are doing a tailcall, change it here. + if (compCurBB->bbJumpKind != BBJ_RETURN) + compCurBB->bbJumpKind = BBJ_RETURN; + + } // Set this flag before calling fgMorphCall() to prevent inlining this call. - call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL; + call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL; + + bool fastTailCallToLoop = false; +#if FEATURE_TAILCALL_OPT + // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register + // or return type is a struct that can be passed in a register. + if (opts.compTailCallLoopOpt && + canFastTailCall && + gtIsRecursiveCall(call) && + !call->IsVirtual() && + !hasStructParam && + !varTypeIsStruct(call->TypeGet())) + { + call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP; + fastTailCallToLoop = true; + } +#endif // Do some target-specific transformations (before we process the args, etc.) // This is needed only for tail prefixed calls that cannot be dispatched as @@ -6302,11 +6616,26 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) // At this point, we are committed to do the tailcall. compTailCallUsed = true; + CorInfoTailCall tailCallResult; + + if (fastTailCallToLoop) + { + tailCallResult = TAILCALL_RECURSIVE; + } + else if (canFastTailCall) + { + tailCallResult = TAILCALL_OPTIMIZED; + } + else + { + tailCallResult = TAILCALL_HELPER; + } + // for non user funcs, we have no handles to report info.compCompHnd->reportTailCallDecision(nullptr, (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr, isTailPrefixed, - canFastTailCall ? TAILCALL_OPTIMIZED : TAILCALL_HELPER, + tailCallResult, nullptr); // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID. @@ -6314,10 +6643,17 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) call->gtType = TYP_VOID; #ifdef DEBUG - if (verbose) { + if (verbose) + { printf("\nGTF_CALL_M_TAILCALL bit set for call "); printTreeID(call); printf("\n"); + if (fastTailCallToLoop) + { + printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call "); + printTreeID(call); + printf("\n"); + } } #endif @@ -6471,6 +6807,13 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) callType = TYP_INT; } #endif +#ifdef FEATURE_SIMD + // Return a dummy node, as the return is already removed. + if (varTypeIsSIMD(callType)) + { + callType = TYP_DOUBLE; + } +#endif result = gtNewZeroConNode(genActualType(callType)); result = fgMorphTree(result); } @@ -6727,18 +7070,6 @@ GenTreePtr Compiler::fgMorphConst(GenTreePtr tree) // guarantee slow performance for that block. Instead cache the return value // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf. -#if defined(RYUJIT_CTPBUILD) - if (compCurBB->bbJumpKind == BBJ_THROW && - info.compCompHnd->canEmbedModuleHandleForHelper(info.compScopeHnd)) - { - // For un-important blocks, we want to construct the string lazily - - GenTreeArgList *args = gtNewArgList(gtNewIconNode(tree->gtStrCon.gtSconCPX, TYP_INT), - gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd)); - tree = gtNewHelperCallNode(CORINFO_HELP_STRCNS, TYP_REF, 0, args); - return fgMorphTree(tree); - } -#else if (compCurBB->bbJumpKind == BBJ_THROW) { CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd); @@ -6762,7 +7093,6 @@ GenTreePtr Compiler::fgMorphConst(GenTreePtr tree) return fgMorphTree(tree); } } -#endif // defined(RYUJIT_CTPBUILD) assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd)); @@ -7019,7 +7349,7 @@ ONE_SIMPLE_ASG: // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid // concern, then we could compromise, and say that address-exposed + fields do not completely cover the memory // of the struct prevent field-wise assignments. Same situation exists for the "src" decision. - if (lclVarTree->TypeGet() == TYP_STRUCT && + if (varTypeIsStruct(lclVarTree) && (lvaTable[lclNum].lvPromoted || lclVarIsSIMDType(lclNum))) { @@ -7095,7 +7425,7 @@ _DoneDest:; if (impIsAddressInLocal(src, &lclVarTree)) { unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum; - if (lclVarTree->TypeGet() == TYP_STRUCT && + if (varTypeIsStruct(lclVarTree) && (lvaTable[lclNum].lvPromoted || lclVarIsSIMDType(lclNum))) { // Let fgMorphCopyBlock handle it. @@ -7260,7 +7590,7 @@ GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree) if (destLclVar->lvPromoted && blockWidthIsConst) { - noway_assert(destLclVar->lvType == TYP_STRUCT); + noway_assert(varTypeIsStruct(destLclVar)); noway_assert(!opts.MinOpts()); if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles) { @@ -7319,7 +7649,7 @@ GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree) noway_assert(genActualType(initVal->gtType) == TYP_INT); // The dest must be of TYP_STRUCT - noway_assert(destLclVar->lvType == TYP_STRUCT); + noway_assert(varTypeIsStruct(destLclVar)); // // Now, convert InitBlock to individual assignments @@ -7517,7 +7847,7 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree) if (destLclVar->lvPromoted && blockWidthIsConst) { - noway_assert(destLclVar->lvType == TYP_STRUCT); + noway_assert(varTypeIsStruct(destLclVar)); noway_assert(!opts.MinOpts()); if (blockWidth == destLclVar->lvExactSize) @@ -7545,7 +7875,7 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree) if (srcLclVar->lvPromoted && blockWidthIsConst) { - noway_assert(srcLclVar->lvType == TYP_STRUCT); + noway_assert(varTypeIsStruct(srcLclVar)); noway_assert(!opts.MinOpts()); if (blockWidth == srcLclVar->lvExactSize) @@ -7715,6 +8045,20 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree) blockSize->gtFlags &= ~GTF_ICON_HDL_MASK; // Clear the GTF_ICON_CLASS_HDL flags } #endif + // Liveness doesn't consider copyblk arguments of simple types as being + // a use or def, so explicitly mark these variables as address-exposed. + if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar)) + { + JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum); + lvaTable[srcLclNum].lvAddrExposed = true; + } + + if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar)) + { + JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum); + lvaTable[destLclNum].lvAddrExposed = true; + } + goto _Done; } @@ -8180,16 +8524,16 @@ Compiler::fgCopySIMDNode(GenTreeSIMD* simdNode) // if the struct is a SIMD struct. // // Arguments: -// tree - GentreePtr. This node will be checked to see this is a feild which belongs to a simd +// tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd // struct used for simd intrinsic or not. // pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut // to simd lclvar's base type. -// indexOut - unsigend pointer, if the tree is used for simd intrinsic, we will set *indexOut +// indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut // equals to the index number of this field. // simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut // equals to the simd struct size which this tree belongs to. // ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore -// tUsedInSIMDIntrinsic check. +// the UsedInSIMDIntrinsic check. // // return value: // A GenTreePtr which points the simd lclvar tree belongs to. If the tree is not the simd @@ -8325,8 +8669,8 @@ GenTreePtr Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree) GenTreePtr newStruct = gtClone(simdOp1Struct); - assert(newStruct != nullptr); - GenTreePtr simdTree = gtNewSIMDNode(TYP_STRUCT, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize); + assert((newStruct != nullptr) && (varTypeIsSIMD(newStruct))); + GenTreePtr simdTree = gtNewSIMDNode(newStruct->gtType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize); GenTreePtr copyBlkDst = gtNewOperNode(GT_ADDR, TYP_BYREF, newStruct); tree = gtNewBlkOpNode(GT_COPYBLK, copyBlkDst, @@ -8370,10 +8714,10 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* ma bool isQmarkColon = false; #if LOCAL_ASSERTION_PROP - unsigned origAssertionCount = DUMMY_INIT(0); + AssertionIndex origAssertionCount = DUMMY_INIT(0); AssertionDsc * origAssertionTab = DUMMY_INIT(NULL); - unsigned thenAssertionCount = DUMMY_INIT(0); + AssertionIndex thenAssertionCount = DUMMY_INIT(0); AssertionDsc * thenAssertionTab = DUMMY_INIT(NULL); #endif @@ -8866,16 +9210,28 @@ NO_MUL_64RSLT: // type handles and instances of System.Type // If this invariant is ever broken, the optimization will need updating +#ifdef LEGACY_BACKEND if ( op1->gtOper == GT_CALL && - op2->gtOper == GT_CALL && - ((op1->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) || (op1->gtCall.gtCallType == CT_HELPER)) && - ((op2->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) || (op2->gtCall.gtCallType == CT_HELPER))) + op2->gtOper == GT_CALL && + ((op1->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) || (op1->gtCall.gtCallType == CT_HELPER)) && + ((op2->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) || (op2->gtCall.gtCallType == CT_HELPER))) +#else + if ((((op1->gtOper == GT_INTRINSIC) && (op1->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) || + ((op1->gtOper == GT_CALL) && (op1->gtCall.gtCallType == CT_HELPER))) && + (((op2->gtOper == GT_INTRINSIC) && (op2->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) || + ((op2->gtOper == GT_CALL) && (op2->gtCall.gtCallType == CT_HELPER)))) +#endif { GenTreePtr pGetClassFromHandle; GenTreePtr pGetType; +#ifdef LEGACY_BACKEND bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1); bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2); +#else + bool bOp1ClassFromHandle = op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1) : false; + bool bOp2ClassFromHandle = op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2) : false; +#endif // Optimize typeof(...) == typeof(...) // Typically this occurs in generic code that attempts a type switch @@ -8926,9 +9282,13 @@ NO_MUL_64RSLT: { pConstLiteral = pConstLiteral->gtOp.gtOp1; } +#ifdef LEGACY_BACKEND if (pGetType->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC && info.compCompHnd->getIntrinsicID(pGetType->gtCall.gtCallMethHnd) == CORINFO_INTRINSIC_Object_GetType && +#else + if ((pGetType->gtOper == GT_INTRINSIC) && (pGetType->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType) && +#endif pConstLiteral->gtOper == GT_CNS_INT && pConstLiteral->gtType == TYP_I_IMPL) { @@ -8937,8 +9297,14 @@ NO_MUL_64RSLT: if (info.compCompHnd->canInlineTypeCheckWithObjectVTable(clsHnd)) { // Method Table tree +#ifdef LEGACY_BACKEND GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtCall.gtCallObjp); +#else + GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtUnOp.gtOp1); +#endif objMT->gtFlags |= GTF_EXCEPT; // Null ref exception if object is null + compCurBB->bbFlags |= BBF_HAS_VTABREF; + optMethodFlags |= OMF_HAS_VTABLEREF; // Method table constant GenTreePtr cnsMT = pGetClassFromHandleArgument; @@ -8962,8 +9328,8 @@ NO_MUL_64RSLT: break; #ifdef _TARGET_ARM_ - case GT_MATH: - if (tree->gtMath.gtMathFN == CORINFO_INTRINSIC_Round) + case GT_INTRINSIC: + if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) { switch (tree->TypeGet()) { @@ -9012,11 +9378,11 @@ NO_MUL_64RSLT: noway_assert(optLocalAssertionProp); if (optAssertionCount) { - noway_assert(optAssertionCount <= MAX_ASSERTION_CNT); // else ALLOCA() is a bad idea + noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea unsigned tabSize = optAssertionCount * sizeof(AssertionDsc); origAssertionTab = (AssertionDsc*) ALLOCA(tabSize); origAssertionCount = optAssertionCount; - memcpy(origAssertionTab, &optAssertionTabPrivate, tabSize); + memcpy(origAssertionTab, optAssertionTabPrivate, tabSize); } else { @@ -9110,11 +9476,11 @@ NO_MUL_64RSLT: noway_assert(optLocalAssertionProp); if (optAssertionCount) { - noway_assert(optAssertionCount <= MAX_ASSERTION_CNT); // else ALLOCA() is a bad idea + noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea unsigned tabSize = optAssertionCount * sizeof(AssertionDsc); thenAssertionTab = (AssertionDsc*) ALLOCA(tabSize); thenAssertionCount = optAssertionCount; - memcpy(thenAssertionTab, &optAssertionTabPrivate, tabSize); + memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize); } else { @@ -9130,7 +9496,11 @@ NO_MUL_64RSLT: * NOTE: Don't reset the exception flags on nodes that may throw */ noway_assert(tree->gtOper != GT_CALL); - tree->gtFlags &= ~GTF_CALL; + + if ((tree->gtOper != GT_INTRINSIC) || !IsIntrinsicImplementedByUserCall(tree->gtIntrinsic.gtIntrinsicId)) + { + tree->gtFlags &= ~GTF_CALL; + } if (!tree->OperMayThrow()) tree->gtFlags &= ~GTF_EXCEPT; @@ -9161,7 +9531,7 @@ NO_MUL_64RSLT: if (origAssertionCount) { size_t tabSize = origAssertionCount * sizeof(AssertionDsc); - memcpy(&optAssertionTabPrivate, origAssertionTab, tabSize); + memcpy(optAssertionTabPrivate, origAssertionTab, tabSize); optAssertionReset(origAssertionCount); } } @@ -9221,12 +9591,12 @@ NO_MUL_64RSLT: { size_t tabSize = optAssertionCount * sizeof(AssertionDsc); if ( (optAssertionCount != thenAssertionCount) || - (memcmp(thenAssertionTab, &optAssertionTabPrivate, tabSize) != 0) ) + (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0) ) { // Yes they are different so we have to find the merged set // Iterate over the copy asgn table removing any entries // that do not have an exact match in the thenAssertionTab - unsigned index = 1; + AssertionIndex index = 1; while (index <= optAssertionCount) { AssertionDsc* curAssertion = optGetAssertion(index); @@ -10254,7 +10624,7 @@ CM_ADD_OP: if (cns1->OperGet() == GT_CNS_INT) { - op2->gtIntCon.gtFieldSeq = + op2->gtIntCon.gtFieldSeq = GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq); } @@ -10479,7 +10849,7 @@ CM_ADD_OP: // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0)) { - noway_assert(varDsc->lvType == TYP_STRUCT); + noway_assert(varTypeIsStruct(varDsc)); // We will try to optimize when we have a single field struct that is being struct promoted if (varDsc->lvFieldCnt == 1) @@ -10676,21 +11046,10 @@ CM_ADD_OP: return temp; } - // If we have marked op1 as a CSE candidate, we cannot perform a commutative reordering - // The comment says that this is required for correctness, but we can't reorder these during the CSE phase! - // See System.Collections.Generic.GenericArraySortHelper`1[TimeSpan][System.TimeSpan]:SwapIfGreaterWithItems(ref,int,int) (MethodHash=870e4ffc) - // - if ((op1->OperGet() == GT_COMMA) && !optValnumCSE_phase) + // Only do this optimization when we are in the global optimizer. Doing this after value numbering + // could result in an invalid value number for the newly generated GT_IND node. + if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph) { - /* After fgGlobalMorph we will check for op1 as a GT_COMMA with an unconditional throw node */ - if (!fgGlobalMorph && fgIsCommaThrow(op1)) - { - // No need to push the GT_IND node into the comma. - // As it will get deleted instead. - // (see the code immediately after this switch stmt) - break; // out of this switch stmt - } - // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)). // TBD: this transformation is currently necessary for correctness -- it might // be good to analyze the failures that result if we don't do this, and fix them @@ -11450,7 +11809,7 @@ ASG_OP: op2 = tree->gtOp2; /* The target is used as well as being defined */ - if (op1->gtOper == GT_LCL_VAR) + if (op1->OperIsLocal()) op1->gtFlags |= GTF_VAR_USEASG; @@ -12494,7 +12853,7 @@ GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac { tree = newTree; /* newTree is non-Null if we propagated an assertion */ - newTree = optAssertionProp(EXPSET_ALL, tree, NULL); + newTree = optAssertionProp(apFull, tree, NULL); } noway_assert(tree != NULL); } @@ -12630,12 +12989,10 @@ void Compiler::fgKillDependentAssertions(unsigned lclNum if (varDsc->lvPromoted) { - noway_assert(varDsc->lvType == TYP_STRUCT); + noway_assert(varTypeIsStruct(varDsc)); // Kill the field locals. - for (unsigned i = varDsc->lvFieldLclStart; - i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; - ++i) + for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i) { fgKillDependentAssertions(i DEBUGARG(tree)); } @@ -12645,16 +13002,14 @@ void Compiler::fgKillDependentAssertions(unsigned lclNum /* All dependent assertions are killed here */ - EXPSET_TP killed = varDsc->lvAssertionDep; + ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum)); if (killed) { - unsigned index = optAssertionCount; - EXPSET_TP mask = optGetAssertionBit(index); - + AssertionIndex index = optAssertionCount; while (killed && (index > 0)) { - if (killed & mask) + if (BitVecOps::IsMember(apTraits, killed, index - 1)) { #ifdef DEBUG AssertionDsc* curAssertion = optGetAssertion(index); @@ -12670,17 +13025,16 @@ void Compiler::fgKillDependentAssertions(unsigned lclNum } #endif // Remove this bit from the killed mask - killed &= ~mask; + BitVecOps::RemoveElemD(apTraits, killed, index - 1); optAssertionRemove(index); } index--; - mask >>= 1; } // killed mask should now be zero - noway_assert(killed == 0); + noway_assert(BitVecOps::IsEmpty(apTraits, killed)); } } #endif // LOCAL_ASSERTION_PROP @@ -13272,7 +13626,8 @@ void Compiler::fgMorphStmts(BasicBlock * block, continue; } #ifdef FEATURE_SIMD - if (stmt->gtStmt.gtStmtExpr->TypeGet() == TYP_FLOAT && + if (!opts.MinOpts() && + stmt->gtStmt.gtStmtExpr->TypeGet() == TYP_FLOAT && stmt->gtStmt.gtStmtExpr->OperGet() == GT_ASG) { fgMorphCombineSIMDFieldAssignments(block, stmt); @@ -13317,11 +13672,12 @@ void Compiler::fgMorphStmts(BasicBlock * block, noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall()); noway_assert(stmt->gtNext == NULL); + GenTreeCall* call = morph->AsCall(); // Could either be // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing a jmp. - noway_assert((morph->AsCall()->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) || - (morph->AsCall()->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && (compCurBB->bbFlags & BBF_HAS_JMP))); + noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) || + (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && (compCurBB->bbFlags & BBF_HAS_JMP))); } else if (block != compCurBB) { @@ -13336,11 +13692,13 @@ void Compiler::fgMorphStmts(BasicBlock * block, noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall()); noway_assert(stmt->gtNext == NULL); + GenTreeCall* call = morph->AsCall(); + // Could either be // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing a jmp. - noway_assert((morph->AsCall()->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) || - (morph->AsCall()->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && (compCurBB->bbFlags & BBF_HAS_JMP))); + noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) || + (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && (compCurBB->bbFlags & BBF_HAS_JMP))); } #ifdef DEBUG @@ -13459,6 +13817,14 @@ void Compiler::fgMorphStmts(BasicBlock * block, noway_assert(fgExpandInline == false); +#if FEATURE_FASTTAILCALL + GenTreePtr recursiveTailCall = nullptr; + if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall)) + { + fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall()); + } +#endif + #ifdef DEBUG compCurBB = (BasicBlock*)DEAD_BEEF; #endif @@ -14626,7 +14992,6 @@ void Compiler::fgMorph() #ifdef DEBUG /* Now that locals have address-taken marked, we can safely apply stress. */ lvaStressLclFld(); - lvaStressFloatLcls(); fgStress64RsltMul(); #endif // DEBUG @@ -14715,122 +15080,140 @@ void Compiler::fgPromoteStructs() lvaStructPromotionInfo structPromotionInfo; - structPromotionInfo.typeHnd = 0; - structPromotionInfo.canPromote = false; - structPromotionInfo.requiresScratchVar = false; - for (unsigned lclNum = 0; - lclNum < startLvaCount; - lclNum++) + lclNum < startLvaCount; + lclNum++) { LclVarDsc* varDsc = &lvaTable[lclNum]; // Don't promote if we have reached the tracking limit. - if (lvaHaveManyLocals()) + if (lvaHaveManyLocals()) { JITDUMP("Stopped promoting struct fields, due to too many locals.\n"); break; } - -#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - if (!varDsc->lvDontPromote) -#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if FEATURE_MULTIREG_STRUCTS + if (varDsc->lvDontPromote) { + JITDUMP("Skipping V%02u: marked as lvDontPromote.\n", lclNum); + continue; + } +#endif // FEATURE_MULTIREG_STRUCTS + #ifdef FEATURE_SIMD - if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic) - { - // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote - // its fields. Instead, we will attempt to enregister the entire struct. - // Note, however, that if the code below does not decide to promote this struct, - // we will still set lvRegStruct if its fields have not been accessed. - varDsc->lvRegStruct = true; - } - else + if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic) + { + // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote + // its fields. Instead, we will attempt to enregister the entire struct. + // Note, however, that if the code below does not decide to promote this struct, + // we will still set lvRegStruct if its fields have not been accessed. + varDsc->lvRegStruct = true; + } + else #endif // FEATURE_SIMD - if (varDsc->lvType == TYP_STRUCT) - { - lvaCanPromoteStructVar(lclNum, &structPromotionInfo); - if (structPromotionInfo.canPromote) - { - // We *can* promote; *should* we promote? - // We should only do so if promotion has potential savings. One source of savings - // is if a field of the struct is accessed, since this access will be turned into - // an access of the corresponding promoted field variable. Even if there are no - // field accesses, but only block-level operations on the whole struct, if the struct - // has only one or two fields, then doing those block operations field-wise is probably faster - // than doing a whole-variable block operation (e.g., a hardware "copy loop" on x86). - // So if no fields are accessed independently, and there are three or more fields, - // then do not promote. - if (structPromotionInfo.fieldCnt > 2 && !varDsc->lvFieldAccessed) - { - JITDUMP("Not promoting promotable struct local V%02u: #fields = %d, fieldAccessed = %d.\n", - lclNum, structPromotionInfo.fieldCnt, varDsc->lvFieldAccessed); - continue; - } + if (varTypeIsStruct(varDsc)) + { + lvaCanPromoteStructVar(lclNum, &structPromotionInfo); + if (structPromotionInfo.canPromote) + { + // We *can* promote; *should* we promote? + // We should only do so if promotion has potential savings. One source of savings + // is if a field of the struct is accessed, since this access will be turned into + // an access of the corresponding promoted field variable. Even if there are no + // field accesses, but only block-level operations on the whole struct, if the struct + // has only one or two fields, then doing those block operations field-wise is probably faster + // than doing a whole-variable block operation (e.g., a hardware "copy loop" on x86). + // So if no fields are accessed independently, and there are three or more fields, + // then do not promote. + if (structPromotionInfo.fieldCnt > 2 && !varDsc->lvFieldAccessed) + { + JITDUMP("Not promoting promotable struct local V%02u: #fields = %d, fieldAccessed = %d.\n", + lclNum, structPromotionInfo.fieldCnt, varDsc->lvFieldAccessed); + continue; + } -#ifdef _TARGET_AMD64_ - // on AMD don't promote structs with a single float field - // Promoting it would just cause us to shuffle it back and forth between int and float regs. - // On ARM this would be an HFA and passed/returned in float regs. - if (structPromotionInfo.fieldCnt==1 - && varTypeIsFloating(structPromotionInfo.fields[0].fldType)) +#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) + // TODO-PERF - Only do this when the LclVar is used in an argument context + // TODO-ARM64 - HFA support should also eliminate the need for this. + // + // For now we currently don't promote structs with a single float field + // Promoting it can cause us to shuffle it back and forth between the int and + // the float regs when it is used as a argument, which is very expensive for XARCH + // + if (structPromotionInfo.fieldCnt==1 + && varTypeIsFloating(structPromotionInfo.fields[0].fldType)) + { + JITDUMP("Not promoting promotable struct local V%02u: #fields = %d because it is a struct with single float field.\n", + lclNum, structPromotionInfo.fieldCnt); + continue; + } +#endif // _TARGET_AMD64_ || _TARGET_ARM64_ +#if FEATURE_MULTIREG_STRUCT_ARGS +#if defined(_TARGET_ARM64_) + // TODO-PERF - Only do this when the LclVar is used in an argument context + // + // For now we currently don't promote structs that can be passed in registers + // + unsigned structSize = lvaLclExactSize(lclNum); + if ((structSize > TARGET_POINTER_SIZE) && (structSize <= MAX_PASS_MULTIREG_BYTES)) + { + JITDUMP("Not promoting promotable struct local V%02u (size==%d): ", + lclNum, structSize); + continue; + } +#endif // _TARGET_ARM64_ +#endif // FEATURE_MULTIREG_STRUCT_ARGS + + if (varDsc->lvIsParam) + { + if (structPromotionInfo.fieldCnt != 1) { - JITDUMP("Not promoting promotable struct local V%02u: #fields = %d because it is a struct with single float field.\n", - lclNum, structPromotionInfo.fieldCnt); + JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam are true and #fields = %d.\n", + lclNum, structPromotionInfo.fieldCnt); continue; } -#endif - if (varDsc->lvIsParam) - { - if (structPromotionInfo.fieldCnt != 1) - { - JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam are true and #fields = %d.\n", - lclNum, structPromotionInfo.fieldCnt); - continue; - } - } - // - // If the lvRefCnt is zero and we have a struct promoted parameter we can end up with an extra store of the the - // incoming register into the stack frame slot. - // In that case, we would like to avoid promortion. - // However we haven't yet computed the lvRefCnt values so we can't do that. - // + } + // + // If the lvRefCnt is zero and we have a struct promoted parameter we can end up with an extra store of the the + // incoming register into the stack frame slot. + // In that case, we would like to avoid promortion. + // However we haven't yet computed the lvRefCnt values so we can't do that. + // #if 0 - // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single - // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number). - static int structPromoVarNum = 0; - structPromoVarNum++; - if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi"))) + // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single + // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number). + static int structPromoVarNum = 0; + structPromoVarNum++; + if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi"))) #endif // 0 - { - // Promote the this struct local var. - lvaPromoteStructVar(lclNum, &structPromotionInfo); + { + // Promote the this struct local var. + lvaPromoteStructVar(lclNum, &structPromotionInfo); #ifdef _TARGET_ARM_ - if (structPromotionInfo.requiresScratchVar) + if (structPromotionInfo.requiresScratchVar) + { + // Ensure that the scratch variable is allocated, in case we + // pass a promoted struct as an argument. + if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM) { - // Ensure that the scratch variable is allocated, in case we - // pass a promoted struct as an argument. - if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM) - { - lvaPromotedStructAssemblyScratchVar = - lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var.")); - lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL; - } + lvaPromotedStructAssemblyScratchVar = + lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var.")); + lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL; } -#endif // _TARGET_ARM_ } +#endif // _TARGET_ARM_ } + } #ifdef FEATURE_SIMD - else if (varDsc->lvSIMDType && !varDsc->lvFieldAccessed) - { - // Even if we have not used this in a SIMD intrinsic, if it is not being promoted, - // we will treat it as a reg struct. - varDsc->lvRegStruct = true; - } -#endif // FEATURE_SIMD + else if (varDsc->lvSIMDType && !varDsc->lvFieldAccessed) + { + // Even if we have not used this in a SIMD intrinsic, if it is not being promoted, + // we will treat it as a reg struct. + varDsc->lvRegStruct = true; } +#endif // FEATURE_SIMD } } } @@ -14856,7 +15239,7 @@ Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalk unsigned lclNum = obj->gtLclVarCommon.gtLclNum; LclVarDsc* varDsc = &lvaTable[lclNum]; - if (obj->gtType == TYP_STRUCT) + if (varTypeIsStruct(obj)) { if (varDsc->lvPromoted) { @@ -14954,7 +15337,7 @@ Compiler::fgWalkResult Compiler::fgMorphLocalField(GenTreePtr tree, fgWalkD unsigned lclNum = tree->gtLclFld.gtLclNum; LclVarDsc* varDsc = &lvaTable[lclNum]; - if ((varDsc->TypeGet() == TYP_STRUCT) && (varDsc->lvPromoted)) + if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted)) { // Promoted struct unsigned fldOffset = tree->gtLclFld.gtLclOffs; @@ -15038,7 +15421,7 @@ void Compiler::fgMarkImplicitByRefArgs() assert(!varDsc->lvPromoted); // Called in the wrong order? - if (varDsc->lvIsParam && (varDsc->lvType == TYP_STRUCT)) + if (varDsc->lvIsParam && varTypeIsStruct(varDsc)) { size_t size; @@ -15109,14 +15492,25 @@ bool Compiler::fgMorphImplicitByRefArgs(GenTreePtr tree, fgWalkData* fgWalkPre) bool isAddr = (tree->gtOper == GT_ADDR); GenTreePtr lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree; - LclVarDsc* lclVarDsc = &lvaTable[lclVarTree->gtLclVarCommon.gtLclNum]; + unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum; + LclVarDsc* lclVarDsc = &lvaTable[lclNum]; - if (!lclVarDsc->lvIsParam || !lclVarDsc->lvIsTemp) + if (!lvaIsImplicitByRefLocal(lclNum)) { // We only need to tranform the 'marked' implicit by ref parameters return false; } + // The SIMD transformation to coalesce contiguous references to SIMD vector fields will + // re-invoke the traversal to mark address-taken locals. + // So, we may encounter a tree that has already been transformed to TYP_BYREF. + // If we do, leave it as-is. + if (!varTypeIsStruct(lclVarTree)) + { + assert(lclVarTree->TypeGet() == TYP_BYREF); + return false; + } + // We are overloading the lvRefCnt field here because real ref counts have not been set. lclVarDsc->lvRefCnt++; @@ -15137,8 +15531,10 @@ bool Compiler::fgMorphImplicitByRefArgs(GenTreePtr tree, fgWalkData* fgWalkPre) else { // Change X into *X + var_types structType = tree->gtType; lclVarTree = gtClone(tree); lclVarTree->gtType = TYP_BYREF; + tree->gtType = structType; tree->SetOper(GT_IND); tree->gtOp.gtOp1 = lclVarTree; // TODO-CQ: If the VM ever stops violating the ABI and passing heap references @@ -15149,6 +15545,7 @@ bool Compiler::fgMorphImplicitByRefArgs(GenTreePtr tree, fgWalkData* fgWalkPre) if (verbose) { printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n"); + gtDispTree(tree, nullptr, nullptr, true); fgWalkPre->printModified = true; } #endif // DEBUG @@ -15876,7 +16273,7 @@ bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreePtr GenTreePtr tree = stmt->gtStmt.gtStmtExpr; assert(tree->OperGet() == GT_ASG); - GenTreePtr origianlLHS = tree->gtOp.gtOp1; + GenTreePtr originalLHS = tree->gtOp.gtOp1; GenTreePtr prevLHS = tree->gtOp.gtOp1; GenTreePtr prevRHS = tree->gtOp.gtOp2; unsigned index = 0; @@ -15946,14 +16343,33 @@ bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreePtr fgRemoveStmt(block, stmt->gtNext); } - GenTreePtr copyBlkDst = createAddressNodeForSIMDInit(origianlLHS, simdSize); + GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize); if (simdStructNode->OperIsLocal()) { setLclRelatedToSIMDIntrinsic(simdStructNode); } - if (copyBlkDst->gtOp.gtOp1->OperIsLocal()) + GenTree* copyBlkAddr = copyBlkDst; + if (copyBlkAddr->gtOper == GT_LEA) + { + copyBlkAddr = copyBlkAddr->AsAddrMode()->Base(); + } + GenTreeLclVarCommon* localDst = nullptr; + if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr)) + { + setLclRelatedToSIMDIntrinsic(localDst); + } + + GenTree* simdStructAddr; + if (simdStructNode->TypeGet() == TYP_BYREF) + { + assert(simdStructNode->OperIsLocal()); + assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum)); + simdStructAddr = simdStructNode; + } + else { - setLclRelatedToSIMDIntrinsic(copyBlkDst->gtOp.gtOp1); + assert(varTypeIsSIMD(simdStructNode)); + simdStructAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, simdStructNode); } #ifdef DEBUG @@ -15968,7 +16384,7 @@ bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreePtr tree = gtNewBlkOpNode(GT_COPYBLK, copyBlkDst, - gtNewOperNode(GT_ADDR, TYP_BYREF, simdStructNode), + simdStructAddr, gtNewIconNode(simdSize), false); diff --git a/src/jit/optcse.cpp b/src/jit/optcse.cpp index c188f4ac98..3c19dcb46d 100644 --- a/src/jit/optcse.cpp +++ b/src/jit/optcse.cpp @@ -376,17 +376,19 @@ int __cdecl Compiler::optCSEcostCmpEx(const void *op1, const void *op2) if (diff != 0) return diff; - diff = (int) (dsc2->csdDefWtCnt - dsc1->csdDefWtCnt); + // Sort the higher Use Counts toward the top + diff = (int) (dsc2->csdUseWtCnt - dsc1->csdUseWtCnt); if (diff != 0) return diff; - diff = (int) (dsc1->csdUseWtCnt - dsc2->csdUseWtCnt); + // With the same use count, Sort the lower Def Counts toward the top + diff = (int) (dsc1->csdDefWtCnt - dsc2->csdDefWtCnt); if (diff != 0) return diff; - // If order to ensure that we have a stable sort we use the cseIndex + // In order to ensure that we have a stable sort the lower csdIndex towards to the top return (int) (dsc1->csdIndex - dsc2->csdIndex); } @@ -404,7 +406,27 @@ int __cdecl Compiler::optCSEcostCmpSz(const void *op1, const void *op2) GenTreePtr exp1 = dsc1->csdTree; GenTreePtr exp2 = dsc2->csdTree; - return exp2->gtCostSz - exp1->gtCostSz; + int diff; + + diff = (int)(exp2->gtCostEx - exp1->gtCostEx); + + if (diff != 0) + return diff; + + // Sort the higher Use Counts toward the top + diff = (int)(dsc2->csdUseWtCnt - dsc1->csdUseWtCnt); + + if (diff != 0) + return diff; + + // With the same use count, Sort the lower Def Counts toward the top + diff = (int)(dsc1->csdDefWtCnt - dsc2->csdDefWtCnt); + + if (diff != 0) + return diff; + + // In order to ensure that we have a stable sort the lower csdIndex towards to the top + return (int)(dsc1->csdIndex - dsc2->csdIndex); } /*****************************************************************************/ @@ -763,7 +785,6 @@ class CSE_DataFlow { private: EXPSET_TP m_preMergeOut; - EXPSET_TP m_postMergeOut; Compiler* m_pCompiler; @@ -788,24 +809,11 @@ public: } // At the end of the merge store results of the dataflow equations, in a postmerge state. - void EndMerge(BasicBlock* block) - { - EXPSET_TP mergeOut = block->bbCseOut & (block->bbCseIn | block->bbCseGen); - m_postMergeOut = mergeOut; - } - - // Check if anything changed by comparing premerge and postmerge states. - bool Changed(BasicBlock* block) - { - bool changed = (m_postMergeOut != m_preMergeOut); - return changed; - } - - // Finish any updates to the basic blocks after the merge. - DataFlow::UpdateResult Update(BasicBlock* block) + bool EndMerge(BasicBlock* block) { - block->bbCseOut = m_postMergeOut; - return DataFlow::ContinueAnalysis; + EXPSET_TP mergeOut = block->bbCseOut & (block->bbCseIn | block->bbCseGen); + block->bbCseOut = mergeOut; + return (mergeOut != m_preMergeOut); } }; @@ -968,6 +976,10 @@ class CSE_Heuristic Compiler::codeOptimize codeOptKind; Compiler::CSEdsc** sortTab; size_t sortSiz; +#ifdef DEBUG + CLRRandom m_cseRNG; + unsigned m_bias; +#endif public: CSE_Heuristic(Compiler* pCompiler) @@ -1005,19 +1017,66 @@ public: #endif unsigned frameSize = 0; + unsigned regAvailEstimate = ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2) + 1); unsigned lclNum; LclVarDsc * varDsc; for (lclNum = 0, varDsc = m_pCompiler->lvaTable; lclNum < m_pCompiler->lvaCount; - lclNum++ , varDsc++) + lclNum++, varDsc++) { - frameSize += m_pCompiler->lvaLclSize(lclNum); + if (varDsc->lvRefCnt == 0) + continue; + + bool onStack = (regAvailEstimate == 0); // true when it is likely that this LclVar will have a stack home + + // Some LclVars always have stack homes + if ((varDsc->lvDoNotEnregister) || (varDsc->lvType == TYP_LCLBLK)) + onStack = true; + +#ifdef _TARGET_X86_ + // Treat floating point and 64 bit integers as always on the stack + if (varTypeIsFloating(varDsc->TypeGet()) || varTypeIsLong(varDsc->TypeGet())) + onStack = true; +#endif + + if (onStack) + { + frameSize += m_pCompiler->lvaLclSize(lclNum); + } + else + { + // For the purposes of estimating the frameSize we + // will consider this LclVar as being enregistered. + // Now we reduce the remaining regAvailEstimate by + // an appropriate amount. + if (varDsc->lvRefCnt <= 2) + { + // a single use single def LclVar only uses 1 + regAvailEstimate -= 1; + } + else + { + // a LclVar with multiple uses and defs uses 2 + if (regAvailEstimate >= 2) + { + regAvailEstimate -= 2; + } + else + { + // Don't try to subtract when regAvailEstimate is 1 + regAvailEstimate = 0; + } + } + } #ifdef _TARGET_XARCH_ - if (frameSize > 0x0A0) + if (frameSize > 0x080) { + // We likely have a large stack frame. + // Thus we might need to use large displacements when loading or storing + // to CSE LclVars that are not enregistered largeFrame = true; - break; + break; // early out, we don't need to keep increasing frameSize } #else // _TARGET_ARM_ if (frameSize > 0x0400) @@ -1043,17 +1102,23 @@ public: if (!varTypeIsFloating(varTyp)) { + // TODO-1stClassStructs: Remove this; it is here to duplicate previous behavior. + // Note that this makes genTypeStSz return 1. + if (varTypeIsStruct(varTyp)) + { + varTyp = TYP_STRUCT; + } enregCount += genTypeStSz(varTyp); } - if ((aggressiveRefCnt == 0) && (enregCount >= CNT_CALLEE_ENREG)) + if ((aggressiveRefCnt == 0) && (enregCount > (CNT_CALLEE_ENREG*3/2))) { if (CodeOptKind() == Compiler::SMALL_CODE) - aggressiveRefCnt = varDsc->lvRefCnt+1; + aggressiveRefCnt = varDsc->lvRefCnt+BB_UNITY_WEIGHT; else - aggressiveRefCnt = varDsc->lvRefCntWtd+1; + aggressiveRefCnt = varDsc->lvRefCntWtd+BB_UNITY_WEIGHT; } - if ((moderateRefCnt == 0) && (enregCount >= CNT_CALLEE_ENREG*2)) + if ((moderateRefCnt == 0) && (enregCount > ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2)))) { if (CodeOptKind() == Compiler::SMALL_CODE) moderateRefCnt = varDsc->lvRefCnt; @@ -1061,8 +1126,15 @@ public: moderateRefCnt = varDsc->lvRefCntWtd; } } - aggressiveRefCnt = max(BB_UNITY_WEIGHT * 3, aggressiveRefCnt); - moderateRefCnt = max((BB_UNITY_WEIGHT * 3)/2, moderateRefCnt); + unsigned mult = 3; + // use smaller value for mult when enregCount is in [0..4] + if (enregCount <= 4) + { + mult = (enregCount <= 2) ? 1 : 2; + } + + aggressiveRefCnt = max(BB_UNITY_WEIGHT * mult, aggressiveRefCnt); + moderateRefCnt = max((BB_UNITY_WEIGHT * mult) / 2, moderateRefCnt); #ifdef DEBUG if (m_pCompiler->verbose) @@ -1070,6 +1142,7 @@ public: printf("\n"); printf("Aggressive CSE Promotion cutoff is %u\n", aggressiveRefCnt); printf("Moderate CSE Promotion cutoff is %u\n", moderateRefCnt); + printf("Framesize estimate is 0x%04X\n", frameSize); printf("We have a %s frame\n", hugeFrame ? "huge" : (largeFrame ? "large" : "small")); } #endif @@ -1139,6 +1212,7 @@ public: unsigned m_useCount; unsigned m_Cost; + unsigned m_Size; public: CSE_Candidate(CSE_Heuristic* context, Compiler::CSEdsc* cseDsc) @@ -1155,6 +1229,7 @@ public: // TODO-CQ: With ValNum CSE's the Expr and its cost can vary. GenTreePtr Expr() { return m_CseDsc->csdTree; } unsigned Cost() { return m_Cost; } + unsigned Size() { return m_Size; } bool LiveAcrossCall() { return (m_CseDsc->csdLiveAcrossCall != 0); } @@ -1162,19 +1237,98 @@ public: { if (m_context->CodeOptKind() == Compiler::SMALL_CODE) { - m_Cost = Expr()->gtCostSz; + m_Cost = Expr()->gtCostSz; // the estimated code size + m_Size = Expr()->gtCostSz; // always the gtCostSz m_defCount = m_CseDsc->csdDefCount; // def count m_useCount = m_CseDsc->csdUseCount; // use count (excluding the implicit uses at defs) } else { - m_Cost = Expr()->gtCostEx; + m_Cost = Expr()->gtCostEx; // the estimated execution cost + m_Size = Expr()->gtCostSz; // always the gtCostSz m_defCount = m_CseDsc->csdDefWtCnt; // weighted def count m_useCount = m_CseDsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs) } } }; +#ifdef DEBUG + //------------------------------------------------------------------------ + // optConfigBiasedCSE: + // Stress mode to shuffle the decision to CSE or not using environment + // variable COMPlus_JitStressBiasedCSE (= 0 to 100%). When the bias value + // is not specified but COMPlus_JitStress is ON, generate a random bias. + // + // Return Value: + // 0 -- This method is indifferent about this CSE (no bias specified and no stress) + // 1 -- This CSE must be performed to maintain specified/generated bias. + // -1 -- This CSE mustn't be performed to maintain specified/generated bias. + // + // Operation: + // A debug stress only method that returns "1" with probability (P) + // defined by: + // + // P = (COMPlus_JitStressBiasedCSE / 100) (or) + // P = (random(100) / 100) when COMPlus_JitStress is specified and + // COMPlus_JitStressBiasedCSE is unspecified. + // + // When specified, the bias is reinterpreted as a decimal number between 0 + // to 100. + // When bias is not specified, a bias is randomly generated if COMPlus_JitStress + // is non-zero. + // + // Callers are supposed to call this method for each CSE promotion decision + // and ignore the call if return value is 0 and honor the 1 with a CSE and + // -1 with a no-CSE to maintain the specified/generated bias. + // + int optConfigBiasedCSE() + { + // Seed the PRNG, if never done before. + if (!m_cseRNG.IsInitialized()) + { + m_cseRNG.Init(m_pCompiler->info.compMethodHash()); + m_bias = m_cseRNG.Next(100); + } + + // Obtain the bias value and reinterpret as decimal. + static ConfigDWORD fJitStressBiasedCSE; + unsigned bias = ReinterpretHexAsDecimal( + fJitStressBiasedCSE.val(CLRConfig::INTERNAL_JitStressBiasedCSE)); + + // Invalid value, check if JitStress is ON. + if (bias > 100) + { + if (!m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, MAX_STRESS_WEIGHT)) + { + // JitStress is OFF for CSE, nothing to do. + return 0; + } + bias = m_bias; + JITDUMP("JitStressBiasedCSE is OFF, but JitStress is ON: generated bias=%d.\n", bias); + } + + // Generate a number between (0, 99) and if the generated + // number is smaller than bias, then perform CSE. + unsigned gen = m_cseRNG.Next(100); + int ret = (gen < bias) ? 1 : -1; + + if (m_pCompiler->verbose) + { + if (ret < 0) + { + printf("No CSE because gen=%d >= bias=%d\n", gen, bias); + } + else + { + printf("Promoting CSE because gen=%d < bias=%d\n", gen, bias); + } + } + + // Indicate whether to perform CSE or not. + return ret; + } +#endif + // Given a CSE candidate decide whether it passes or fails the profitablity heuristic // return true if we believe that it is profitable to promote this candidate to a CSE // @@ -1183,6 +1337,13 @@ public: bool result = false; #ifdef DEBUG + int stressResult = optConfigBiasedCSE(); + if (stressResult != 0) + { + // Stress is enabled. Check whether to perform CSE or not. + return (stressResult > 0); + } + if (m_pCompiler->optConfigDisableCSE2()) { return false; // skip this CSE @@ -1238,8 +1399,12 @@ public: unsigned no_cse_cost = 0; unsigned yes_cse_cost = 0; + unsigned extra_yes_cost = 0; + unsigned extra_no_cost = 0; - unsigned cseRefCnt = (candidate->DefCount() * 2) + candidate->DefCount(); + // The 'cseRefCnt' is the RefCnt that we will have if we promote this CSE into a new LclVar + // Each CSE Def will contain two Refs and each CSE Use wil have one Ref of this new LclVar + unsigned cseRefCnt = (candidate->DefCount() * 2) + candidate->UseCount(); if (CodeOptKind() == Compiler::SMALL_CODE) { @@ -1324,43 +1489,58 @@ public: cse_def_cost = 1; cse_use_cost = 1; } - else if (candidate->LiveAcrossCall() == 0) - { -#ifdef DEBUG - if (m_pCompiler->verbose) - { - printf("Aggressive CSE Promotion (CSE never live at call)\n"); - } -#endif - if (cseRefCnt >= moderateRefCnt) - cse_def_cost = 1; - else - cse_def_cost = (IND_COST_EX + 1) / 2; - cse_use_cost = 1; - } else if (cseRefCnt >= moderateRefCnt) { -#ifdef DEBUG - if (m_pCompiler->verbose) + + if (candidate->LiveAcrossCall() == 0) { - printf("Moderate CSE Promotion (%u >= %u)\n", cseRefCnt, moderateRefCnt); +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("Moderate CSE Promotion (CSE never live at call) (%u >= %u)\n", cseRefCnt, moderateRefCnt); + } +#endif + cse_def_cost = 2; + cse_use_cost = 1; } + else // candidate is live across call + { +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("Moderate CSE Promotion (%u >= %u)\n", cseRefCnt, moderateRefCnt); + } #endif - cse_def_cost = (IND_COST_EX + 1) / 2; - cse_use_cost = (IND_COST_EX + 1) / 2; - yes_cse_cost = 2; // We might have to spill/restore a caller saved register + cse_def_cost = 2; + cse_use_cost = 2; + extra_yes_cost = BB_UNITY_WEIGHT * 2; // Extra cost in case we have to spill/restore a caller saved register + } } - else + else // Conservative CSE promotion { -#ifdef DEBUG - if (m_pCompiler->verbose) + if (candidate->LiveAcrossCall() == 0) { - printf("Conservative CSE Promotion (%u < %u)\n", cseRefCnt, moderateRefCnt); +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("Conservative CSE Promotion (CSE never live at call) (%u < %u)\n", cseRefCnt, moderateRefCnt); + } +#endif + cse_def_cost = 2; + cse_use_cost = 2; } + else // candidate is live across call + { +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("Conservative CSE Promotion (%u < %u)\n", cseRefCnt, moderateRefCnt); + } #endif - cse_def_cost = IND_COST_EX; - cse_use_cost = IND_COST_EX; - yes_cse_cost = 4; // We might have to spill/restore a caller saved register + cse_def_cost = 3; + cse_use_cost = 3; + extra_yes_cost = BB_UNITY_WEIGHT * 4; // Extra cost in case we have to spill/restore a caller saved register + } // If we have maxed out lvaTrackedCount then this CSE may end up as an untracked variable if (m_pCompiler->lvaTrackedCount == lclMAX_TRACKED) @@ -1369,6 +1549,7 @@ public: cse_use_cost++; } } + if (largeFrame) { cse_def_cost++; @@ -1381,32 +1562,48 @@ public: } } + // estimate the cost from lost codesize reduction if we do not perform the CSE + if (candidate->Size() > cse_use_cost) + { + Compiler::CSEdsc* dsc = candidate->CseDsc(); // We need to retrieve the actual use count, not the weighted count + extra_no_cost = candidate->Size() - cse_use_cost; + extra_no_cost = extra_no_cost * dsc->csdUseCount * 2; + } + /* no_cse_cost is the cost estimate when we decide not to make a CSE */ /* yes_cse_cost is the cost estimate when we decide to make a CSE */ - no_cse_cost = candidate->UseCount() * candidate->Cost(); - yes_cse_cost += (candidate->DefCount() * cse_def_cost) + (candidate->UseCount() * cse_use_cost); + no_cse_cost = candidate->UseCount() * candidate->Cost(); + yes_cse_cost = (candidate->DefCount() * cse_def_cost) + (candidate->UseCount() * cse_use_cost); #if CPU_LONG_USES_REGPAIR if (candidate->Expr()->TypeGet() == TYP_LONG) { - yes_cse_cost += (candidate->DefCount() * cse_def_cost) + (candidate->UseCount() * cse_use_cost); + yes_cse_cost *= 2; } #endif + no_cse_cost += extra_no_cost; + yes_cse_cost += extra_yes_cost; #ifdef DEBUG if (m_pCompiler->verbose) { + printf("cseRefCnt=%d, aggressiveRefCnt=%d, moderateRefCnt=%d\n", cseRefCnt, aggressiveRefCnt, moderateRefCnt); + printf("defCnt=%d, useCnt=%d, cost=%d, size=%d\n", candidate->DefCount(), candidate->UseCount(), candidate->Cost(), candidate->Size()); + printf("def_cost=%d, use_cost=%d, extra_no_cost=%d, extra_yes_cost=%d\n", cse_def_cost, cse_use_cost, extra_no_cost, extra_yes_cost); + printf("CSE cost savings check (%u >= %u) %s\n", no_cse_cost, yes_cse_cost, (no_cse_cost >= yes_cse_cost) ? "passes" : "fails"); } #endif - /* Does it cost us more to make this expression a CSE? */ + // Should we make this candidate into a CSE? + // Is the yes cost less than the no cost + // if (yes_cse_cost <= no_cse_cost) { - result = true; // YES_CSE + result = true; // Yes make this a CSE } else { @@ -1417,7 +1614,7 @@ public: if (m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, percentage)) { - result = true; // YES_CSE + result = true; // Yes make this a CSE } } } @@ -1434,20 +1631,23 @@ public: { unsigned cseRefCnt = (successfulCandidate->DefCount() * 2) + successfulCandidate->UseCount(); - // As we introduce new LclVars for these CSE we slightly - // increase the cutoffs for aggressive and moderate CSE's - // if (successfulCandidate->LiveAcrossCall() != 0) { - int incr = 1; + // As we introduce new LclVars for these CSE we slightly + // increase the cutoffs for aggressive and moderate CSE's + // + int incr = BB_UNITY_WEIGHT; + #if CPU_LONG_USES_REGPAIR if (successfulCandidate->Expr()->TypeGet() == TYP_LONG) incr *= 2; #endif + if (cseRefCnt > aggressiveRefCnt) - aggressiveRefCnt += (2*incr); + aggressiveRefCnt += incr; + if (cseRefCnt > moderateRefCnt) - moderateRefCnt += incr; + moderateRefCnt += (incr/2); } /* Introduce a new temp for the CSE */ @@ -2040,8 +2240,8 @@ bool Compiler::optIsCSEcandidate(GenTreePtr tree) case GT_GT: return true; // Also CSE these Comparison Operators - case GT_MATH: - return true; // FP Instrinsics: Round, Sqrt, etc... + case GT_INTRINSIC: + return true; // Intrinsics case GT_COMMA: return true; // Allow GT_COMMA nodes to be CSE-ed. diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp index f31e77d17a..ff22902e55 100644 --- a/src/jit/optimizer.cpp +++ b/src/jit/optimizer.cpp @@ -43,6 +43,7 @@ void Compiler::optInit() optIndirectCallCount = 0; optNativeCallCount = 0; optAssertionCount = 0; + optAssertionDep = NULL; #if FEATURE_ANYCSE optCSECandidateTotal = 0; optCSEstart = UINT_MAX; @@ -3316,13 +3317,13 @@ void Compiler::fgOptWhileLoop(BasicBlock * block) // Since test is a BBJ_COND it will have a bbNext noway_assert(bTest->bbNext); - // 'block' must be in the same try region as the condition, since we're going to insert - // a duplicated condition in 'block', and the condition might include exception throwing code. - if (!BasicBlock::sameTryRegion(block, bTest)) - return; + // 'block' must be in the same try region as the condition, since we're going to insert + // a duplicated condition in 'block', and the condition might include exception throwing code. + if (!BasicBlock::sameTryRegion(block, bTest)) + return; // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the - // same try region (or no try region) to avoid generating illegal flow. + // same try region (or no try region) to avoid generating illegal flow. BasicBlock* bTestNext = bTest->bbNext; if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext)) return; @@ -6565,7 +6566,7 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk) if (lhs->OperGet() == GT_IND) { - GenTreePtr arg = lhs->gtOp.gtOp1; + GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/true); FieldSeqNode* fldSeqArrElem = nullptr; if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) @@ -7601,494 +7602,6 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, Lo return WALK_CONTINUE; } -/***************************************************************************** - * - * Delete range checks in a loop if can prove that the index expression is - * in range. - */ - -void Compiler::optOptimizeInducIndexChecks(unsigned loopNum, arraySizes arrayDesc[]) -{ - noway_assert(loopNum < optLoopCount); - - JITDUMP("Considering loop %d for induction variable range checks.\n", loopNum); - - LoopDsc * loop = optLoopTable + loopNum; - - const unsigned LOOP_LEN_NOCONST = ((unsigned)-1); - unsigned constLoopBound = LOOP_LEN_NOCONST; - - /* Get the iterator variable */ - - if (!(loop->lpFlags & LPFLG_ITER)) - return; - - unsigned ivLclNum = loop->lpIterVar(); - - JITDUMP(" (possible) iter var is V%02u.\n", ivLclNum); - - if (lvaVarAddrExposed(ivLclNum)) - { - JITDUMP(" Rejected V%02u as iter var because is address-exposed.\n", ivLclNum); - return; - } - - - /* Any non-negative constant is a good initial value */ - - if (!(loop->lpFlags & LPFLG_CONST_INIT)) - { - JITDUMP(" Rejected V%02u as iter var because of non-constant init.\n", ivLclNum); - return; - } - - int posBias = loop->lpConstInit; - - if (posBias < 0) - { - JITDUMP(" Rejected V%02u as iter var because of negative init.\n", ivLclNum); - return; - } - - /* We require the loop to add by exactly one. - The reason for this requirement is that we don't have any guarantees - on the length of the array. For example, if the array length is 0x7fffffff - and the increment is by 2, then it's possible for the loop test to overflow - to 0x80000000 and hence succeed. In that case it's not legal for us to - remove the bounds check, because it is expected to fail. - It is possible to relax this constraint to allow for iterators <= i if we - can guarantee that arrays will have no more than 0x7fffffff-i+1 elements. - This requires corresponding logic in the EE that can get out of date with - the codes here. We will consider implementing that if this constraint - causes performance issues. */ - - if ((loop->lpIterOper() != GT_ASG_ADD) || (loop->lpIterConst() != 1)) - { - JITDUMP(" Rejected V%02u as iter var because of stride different from 1.\n", ivLclNum); - return; - } - - BasicBlock * head = loop->lpHead; - BasicBlock * end = loop->lpBottom; - BasicBlock * beg = head->bbNext; - - /* Find the loop termination test. if can't, give up */ - - if (end->bbJumpKind != BBJ_COND) - { - JITDUMP(" Gave up on loop: couldn't find termination test.\n"); - return; - } - - /* conditional branch must go back to top of loop */ - if (end->bbJumpDest != beg) - { - JITDUMP(" Gave up on loop: couldn't find termination test.\n"); - return; - } - - GenTreePtr conds = optFindLoopTermTest(end); - - if (conds == NULL) - { - JITDUMP(" Gave up on loop: couldn't find loop condition.\n"); - return; - } - - /* Get to the condition node from the statement tree */ - - noway_assert(conds->gtOper == GT_STMT); - noway_assert(conds->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); - // pointer to the node containing the loop test - noway_assert(conds->gtStmt.gtStmtExpr->gtOp.gtOp1 == loop->lpTestTree); - - /* if test isn't less than or less than equal, forget it */ - if (loop->lpTestOper() != GT_LT && loop->lpTestOper() != GT_LE) - { - JITDUMP(" Gave up on loop: loop condition in wrong form.\n"); - return; - } - -#ifdef DEBUG - GenTreePtr op1 = loop->lpIterator(); - noway_assert((op1->gtOper==GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum==ivLclNum)); -#endif - - /* op2 is the loop termination test */ - GenTreePtr op2 = loop->lpLimit(); - - /* Is second operand a region constant? We could allow some expressions - * here like "arr_length - 1" - */ - GenTreePtr rc = op2; - int negBias = 0; - - bool isLessThanOrEqual = (loop->lpTestOper() == GT_LE); - -AGAIN: - switch (rc->gtOper) - { - size_t ival; - case GT_ADD: - /* we allow length + negconst */ - if (rc->gtOp.gtOp2->IsCnsIntOrI() - && rc->gtOp.gtOp2->gtIntCon.gtIconVal < 0 - && rc->gtOp.gtOp1->gtOper == GT_ARR_LENGTH) - { - // If less than or equal make sure the negative constant is at least 1 - ssize_t ivalTemp = -rc->gtOp.gtOp2->gtIntCon.gtIconVal; - if ((ivalTemp < 0x7fffffff) && (!isLessThanOrEqual || ivalTemp > 0)) - { - negBias = (int)ivalTemp; - op2 = rc = rc->gtOp.gtOp1; - goto AGAIN; - } - } - break; - - case GT_SUB: - /* we allow length - posconst */ - if (rc->gtOp.gtOp2->IsCnsIntOrI() - && (size_t)rc->gtOp.gtOp2->gtIntCon.gtIconVal < 0x7fffffff - && (!isLessThanOrEqual || (size_t)rc->gtOp.gtOp2->gtIntCon.gtIconVal > 0) - && rc->gtOp.gtOp1->gtOper == GT_ARR_LENGTH) - { - negBias = (int)rc->gtOp.gtOp2->gtIntCon.gtIconVal; - op2 = rc = rc->gtOp.gtOp1; - goto AGAIN; - } - break; - - case GT_ARR_LENGTH: - /* recurse to check if operand is RC */ - rc = rc->gtOp.gtOp1; - goto AGAIN; - - case GT_LCL_VAR: - LclVarDsc * varDscRc; - varDscRc = optIsTrackedLocal(rc); - - /* if variable not tracked, quit */ - if (!varDscRc) - { - JITDUMP(" Gave up on loop: can't prove loop condition constant (depends on untracked var).\n"); - return; - } - - // If address-exposed, quit. - if (varDscRc->lvAddrExposed) - { - JITDUMP(" Gave up on loop: can't prove loop condition constant (depends on address-exposed var).\n"); - return; - } - - /* if altered, then quit */ - if (optIsVarAssgLoop(loopNum, rc->gtLclVarCommon.gtLclNum)) - { - JITDUMP(" Gave up on loop: can't prove loop condition constant (depends on var modified in loop).\n"); - return; - } - - break; - - case GT_CNS_INT: - /* save the const loop limit */ - ival = rc->gtIntCon.gtIconVal; - // If less than or equal make sure the negative constant is at most max - 1. - if (ival < 0x7fffffff && (!isLessThanOrEqual || ival < 0x7ffffffe)) - constLoopBound = (unsigned)rc->gtIntCon.gtIconVal; - break; - - default: - JITDUMP(" Gave up on loop: loop condition in unrecognized form.\n"); - return; - } - - if (op2->gtOper == GT_LCL_VAR) - { - VARSET_TP VARSET_INIT_NOCOPY(killedVars, VarSetOps::MakeEmpty(this)); - bool isLhsRhsKilled = false; - - GenTreePtr opInitFound = NULL; - BasicBlock* block = head; - int iterCount = 0; - while (block != NULL && opInitFound == NULL && !isLhsRhsKilled) - { - opInitFound = optFindLocalInit(block, op2, &killedVars, &isLhsRhsKilled); - flowList* preds = block->bbPreds; - // Exactly one predecessor. - block = (preds != NULL && preds->flNext == NULL) ? preds->flBlock : NULL; - if ((block == head) || (++iterCount >= 10)) - { - break; - } - } - op2 = opInitFound; - } - - unsigned arrayLclNum = 0; - -#if FANCY_ARRAY_OPT - GenTreePtr loopLim; - const unsigned CONST_ARRAY_LEN = ((unsigned)-1); - arrayLclNum = CONST_ARRAY_LEN -#endif - - /* only thing we want is array length (note we update op2 above - * to allow "arr_length - posconst") - */ - - if (constLoopBound == LOOP_LEN_NOCONST) - { - bool isArrLenVarNumFound = false; - if (op2) - { - switch (op2->gtOper) - { - // The default case (< or <= array.length) - case GT_ARR_LENGTH: - if (op2->gtArrLen.ArrRef()->gtOper == GT_LCL_VAR) - { - // Don't allow <= if there isn't any neg bias . - if (loop->lpTestOper() == GT_LT || (loop->lpTestOper() == GT_LE && negBias > 0)) - { - arrayLclNum = op2->gtArrLen.ArrRef()->gtLclVarCommon.gtLclNum; - isArrLenVarNumFound = true; - } - } - break; - - // Cases where we have (< or <= array.length + negative_K) or (< or <= array.length - positive_K) - case GT_SUB: - case GT_ADD: - { - GenTreePtr arrOpLen = op2->gtOp.gtOp1; - GenTreePtr arrOpConst = op2->gtOp.gtOp2; - - // Op1 should be arr length - if (arrOpLen->gtOper != GT_ARR_LENGTH || - arrOpLen->gtArrLen.ArrRef()->gtOper != GT_LCL_VAR || - !arrOpConst->IsCnsIntOrI()) - { - break; - } - - // If GT_ADD, negate the constant and turn it into positive value. - ssize_t ivalTemp = (op2->gtOper == GT_ADD) ? (-arrOpConst->gtIntCon.gtIconVal) - : arrOpConst->gtIntCon.gtIconVal; - // Cannot remove range check if: - // 1. A negative constant for sub and positive constant for add. - // 2. A value >= 0x7fffffff for constant. - // 3. If the loop condition is GT_LE, the constant value should be >= 1. i.e., arrLen + (-1). - if ((ivalTemp < 0) || (ivalTemp >= 0x7fffffff) || (isLessThanOrEqual && ivalTemp <= 0)) - { - break; - } - negBias = static_cast<int>(ivalTemp); - op2 = arrOpLen; - arrayLclNum = arrOpLen->gtArrLen.ArrRef()->gtLclVarCommon.gtLclNum; - isArrLenVarNumFound = true; - } - break; - - default: - JITDUMP(" Gave up on loop: loop condition in unrecognized form.\n"); - return; - } - } - - if (!isArrLenVarNumFound) - { -#if FANCY_ARRAY_OPT - loopLim = rc; -#else - JITDUMP(" Gave up on loop: loop condition in unrecognized form.\n"); - return; -#endif - } - -#if FANCY_ARRAY_OPT - if (arrayLclNum != CONST_ARRAY_LEN) -#endif - { - LclVarDsc * varDscArr = optIsTrackedLocal(op2->gtOp.gtOp1); - - /* If array local not tracked, quit. - If the array has been altered in the loop, forget it */ - - if (!varDscArr || - optIsVarAssgLoop(loopNum, (unsigned)(varDscArr - lvaTable))) - { -#if FANCY_ARRAY_OPT - arrayLclNum = CONST_ARRAY_LEN; -#else - JITDUMP(" Gave up on loop: loop condition array var not tracked or modified in loop.\n"); - return; -#endif - } - } - } - else - { - // If constLoopBound > all the array dimensions we know, we can't remove the range check. - bool isRemovable = false; - for (unsigned i = 0; i < MAX_ARRAYS; i++) - { - if (static_cast<int>(constLoopBound) <= arrayDesc[i].arrayDim) - { - isRemovable = true; - break; - } - } - if (!isRemovable) - { - JITDUMP(" Gave up on loop: constant loop bound not removable.\n"); - return; - } - } - - /* Now scan for range checks on the induction variable */ - - for (BasicBlock * block = beg; /**/; block = block->bbNext) - { -#if FEATURE_ANYCSE - /* Make sure we update the weighted ref count correctly */ - optCSEweight = block->getBBWeight(this); -#endif // FEATURE_ANYCSE - - /* Walk the statement trees in this basic block */ - -#if JIT_FEATURE_SSA_SKIP_DEFS - for (GenTreeStmt* stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt) -#else - for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) -#endif - { - for (GenTreePtr tree = stmt->gtStmtList; tree; tree = tree->gtNext) - { - /* no more can be done if we see the increment of induc var */ - if (tree->OperKind() & GTK_ASGOP) - { - if (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR - && tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == ivLclNum) - return; - } - - // To facilitate removal of the range check, look for its parent, - // which should be GT_COMMA - if (tree->gtOper == GT_COMMA && tree->gtOp.gtOp1->OperGet() == GT_ARR_BOUNDS_CHECK) - { - int size; - int delta = 0; - - GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk(); - GenTreePtr arr = bndsChk->GetArray(); - if (arr == NULL) - { - continue; - } - - /* does the array ref match our known array */ - if (arr->gtOper != GT_LCL_VAR) - continue; - - if (constLoopBound != LOOP_LEN_NOCONST) - { - for (unsigned i = 0; i < MAX_ARRAYS; i++) - { - if (arr->gtLclVarCommon.gtLclNum == arrayDesc[i].arrayVar) - { - // retrieve array size - size = arrayDesc[i].arrayDim; - if (size >= (int)constLoopBound) - { - delta = size - (int)constLoopBound; - goto CHK_INDX; - } - } - } - - //does not match known arrays, so quit - continue; - } - - if (arr->gtLclVarCommon.gtLclNum != arrayLclNum) - { -#if FANCY_ARRAY_OPT - if (arrayLclNum == CONST_ARRAY_LEN) - { - LclVarDsc * arrayDsc; - - noway_assert(arr->gtLclVarCommon.gtLclNum < lvaCount); - arrayDsc = lvaTable + arr->gtLclVarCommon.gtLclNum; - - if (arrayDsc->lvKnownDim) - { - if (optIsNoMore(loopLim, arrayDsc->lvKnownDim, (loop->lpTestOper() == GT_LE))) - { - index = (*pnop)->gtOp.gtOp1; - - // TODO: Allow "i+1" and things like that - - goto RMV; - } - } - } -#endif - continue; - } - -CHK_INDX: - GenTreePtr index = bndsChk->gtIndex; - - /* allow sub of non-neg constant from induction variable - * if we had bigger initial value - */ - if (index->gtOper == GT_SUB - && index->gtOp.gtOp2->IsCnsIntOrI()) - { - ssize_t ival = index->gtOp.gtOp2->gtIntCon.gtIconVal; - if (ival >= 0 && ival <= posBias) - index = index->gtOp.gtOp1; - } - - /* allow add of constant to induction variable - * if we had a sub from length - */ - if (index->gtOper == GT_ADD - && index->gtOp.gtOp2->IsCnsIntOrI()) - { - // GT_ADD with -ve or +ve offsets - ssize_t ival = index->gtOp.gtOp2->gtIntCon.gtIconVal; - if ((ival >= 0 && ival <= negBias)|| (ival < 0 && -ival <= posBias) ||((ival >=0 )&& (ival <= delta))) - index = index->gtOp.gtOp1; - } - -#if FANCY_ARRAY_OPT -RMV: -#endif - /* is index our induction var? */ - if (!(index->gtOper == GT_LCL_VAR - && index->gtLclVarCommon.gtLclNum == ivLclNum)) - continue; - - /* no need for range check */ - optRemoveRangeCheck(tree, stmt, false); - -#if COUNT_RANGECHECKS - optRangeChkRmv++; -#endif - } - } - } - - if (block == end) - break; - } - -} - - struct optRangeCheckDsc { Compiler* pCompiler; @@ -8200,267 +7713,6 @@ bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree) return true; } -/***************************************************************************** - * - * Try to optimize away as many array index range checks as possible. - */ - -void Compiler::optOptimizeIndexChecks() -{ -#ifdef DEBUG - if (verbose) - { - printf("*************** In optOptimizeIndexChecks()\n"); - if (verboseTrees) - { - printf("Blocks/trees before phase\n"); - fgDispBasicBlocks(true); - } - } -#endif - - /* - * we track MAX_ARRAYS arrays - * unsigned arrayVar; - * long arrayDim = DUMMY_INIT(~0); - */ - - arraySizes arrayDesc[MAX_ARRAYS]; - unsigned arrayIndx = 0; - - bool arrayInit = false; - -#if FANCY_ARRAY_OPT - LclVarDsc * arrayDsc; -#endif - - unsigned - const NO_ARR_VAR = (unsigned)-1; - - // initialize the array table - unsigned indx; - for (indx=0; indx<MAX_ARRAYS;indx++) - { - arrayDesc[indx].arrayVar = NO_ARR_VAR; - arrayDesc[indx].arrayDim = DUMMY_INIT(~0); - } - - /* Walk all the basic blocks in the function. We don't need to worry about - flow control as we already know if the array-local is lvAssignTwo or - not. Also, the local may be accessed before it is assigned the newarr - but we can still eliminate the range-check, as a null-ptr exception - will be caused as desired. */ - - for (BasicBlock * block = fgFirstBB; block; block = block->bbNext) - { - GenTreeStmt* stmt; - GenTreePtr tree; - - /* Ignore the block if it doesn't contain 'new' of an array */ - - if (!(block->bbFlags & BBF_HAS_NEWARRAY)) - continue; - - /* We have not noticed any array allocations yet */ - - //arrayVar = NO_ARR_VAR; - - /* Walk the statement trees in this basic block */ - -#if JIT_FEATURE_SSA_SKIP_DEFS - for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt) -#else - for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) -#endif - { - for (tree = stmt->gtStmtList; tree; tree = tree->gtNext) - { - switch (tree->gtOper) - { - GenTreePtr op1; - GenTreePtr op2; - GenTreePtr arrayDimNode; - - case GT_ASG: - op1 = tree->gtOp.gtOp1; - op2 = tree->gtOp.gtOp2; - - - /* We are only interested in assignments to locals */ - if (op1->gtOper != GT_LCL_VAR) - break; - - // If the variable has its address taken, we cannot remove the range check - // - if (lvaTable[op1->gtLclVarCommon.gtLclNum].lvAddrExposed) - { - break; - } -#if ASSERTION_PROP - // If the variable has multiple defs we cannot remove the range check - // - if (!lvaTable[op1->gtLclVarCommon.gtLclNum].lvSingleDef) - { - break; - } -#endif - - /* Is this an assignment of 'new array' ? */ - - if (op2->gtOper != GT_CALL || - op2->gtCall.gtCallType != CT_HELPER) - { - break; - } - - if (op2->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_NEWARR_1_DIRECT) && - op2->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_NEWARR_1_OBJ) && - op2->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_NEWARR_1_VC) && - op2->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_NEWARR_1_ALIGN8)) - { - break; - } - - arrayDimNode = NULL; - - /* Extract the array dimension from the helper call */ - noway_assert(op2->gtOper == GT_CALL); - - fgArgTabEntryPtr arg1TabEntry; arg1TabEntry = gtArgEntryByArgNum(op2, 1); - arrayDimNode = arg1TabEntry->node; - - PREFAST_ASSUME(arrayDimNode != NULL); - - if (arrayDimNode->IsCnsIntOrI()) - { - /* We have a constant-sized array */ - arrayDesc[arrayIndx].arrayVar = op1->gtLclVarCommon.gtLclNum; - size_t arrayDim = arrayDimNode->gtIntCon.gtIconVal; - if (arrayDim > 0x7fffffff) - break; - arrayDesc[arrayIndx].arrayDim = (int)arrayDim; - arrayIndx = (arrayIndx + 1) % MAX_ARRAYS; - arrayInit = true; - } -#if FANCY_ARRAY_OPT - else - { - /* Make sure the value looks promising */ - - GenTreePtr tmp = arrayDimNode; - if (tmp->gtOper == GT_ADD && - tmp->gtOp.gtOp2->IsCnsIntOrI()) - tmp = tmp->gtOp.gtOp1; - - if (tmp->gtOper != GT_LCL_VAR) - break; - - noway_assert(tmp->gtLclVarCommon.gtLclNum < lvaCount); - arrayDsc = lvaTable + tmp->gtLclVarCommon.gtLclNum; - - if (arrayDsc->lvAssignTwo) - break; - if (arrayDsc->lvAssignOne && arrayDsc->lvIsParam) - break; - } - - /* Is there one assignment to the array? */ - - noway_assert(op1->gtLclVarCommon.gtLclNum < lvaCount); - arrayDsc = lvaTable + op1->gtLclVarCommon.gtLclNum; - - if (arrayDsc->lvAssignTwo) - break; - - /* Record the array size for later */ - - arrayDsc->lvKnownDim = arrayDimNode; -#endif - - break; - - - case GT_COMMA: -#if FANCY_ARRAY_OPT - // TODO-CQ: Figure out what to do for this - is FANCY_ARRAY_OPT ever enabled??? - if (op2 != nullptr && op2->OperGet() == GT_IND && (op2->gtFlags & GTF_IND_RNGCHK)) -#else - if ((tree->gtOp.gtOp1->OperGet() == GT_ARR_BOUNDS_CHECK) && arrayInit) -#endif - { - GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk(); - GenTreePtr arr = bndsChk->GetArray(); - - /* Is the address of the array a simple variable? */ - if (arr == NULL || arr->gtOper != GT_LCL_VAR) - break; - - GenTreePtr ind = bndsChk->gtIndex; - if (!ind->IsCnsIntOrI()) - break; - - /* Do we know the size of the array? */ - - int size = -1; - - for (indx=0; indx<MAX_ARRAYS;indx++) - { - if (arr->gtLclVarCommon.gtLclNum == arrayDesc[indx].arrayVar) - { - //noway_assert(arrayInit); - size = arrayDesc[indx].arrayDim; - break; - } - } - - if (size == -1) - { -#if FANCY_ARRAY_OPT - noway_assert(op1->gtLclVarCommon.gtLclNum < lvaCount); - arrayDsc = lvaTable + op1->gtLclVarCommon.gtLclNum; - - GenTreePtr dimx = arrayDsc->lvKnownDim; - if (!dimx) - break; - size = dimx->gtIntCon.gtIconVal; -#else - break; -#endif - } - - /* Is the index value within the correct range? */ - if (ind->gtIntConCommon.IconValue() < 0) - break; - if (ind->gtIntConCommon.IconValue() >= size) - break; - - /* no need for range check */ - - optRemoveRangeCheck(tree, stmt, true); - } - - break; - - default: - break; - } - } - } - } - - /* Optimize range checks on induction variables. */ - - JITDUMP("Trying to eliminate induction variable range checks.\n"); - for (unsigned i=0; i < optLoopCount; i++) - { - /* Beware, some loops may be thrown away by unrolling or loop removal */ - - if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED)) - optOptimizeInducIndexChecks(i, arrayDesc); - } -} - - /****************************************************************************** * * Replace x==null with (x|x)==0 if x is a GC-type. diff --git a/src/jit/rangecheck.cpp b/src/jit/rangecheck.cpp index 38eab6ae3e..99fa9cf150 100644 --- a/src/jit/rangecheck.cpp +++ b/src/jit/rangecheck.cpp @@ -80,15 +80,33 @@ bool RangeCheck::BetweenBounds(Range& range, int lower, GenTreePtr upper) } JITDUMP("\n"); #endif - // If the upper limit is not length, then bail. - if (!m_pCompiler->vnStore->IsVNArrLen(uLimitVN)) + + ValueNum arrRefVN = ValueNumStore::NoVN; + int arrSize = 0; + + if (m_pCompiler->vnStore->IsVNConstant(uLimitVN)) + { + ssize_t constVal = -1; + unsigned iconFlags = 0; + + if (m_pCompiler->optIsTreeKnownIntValue(true, upper, &constVal, &iconFlags)) + { + arrSize = (int)constVal; + } + } + else if (m_pCompiler->vnStore->IsVNArrLen(uLimitVN)) + { + // Get the array reference from the length. + arrRefVN = m_pCompiler->vnStore->GetArrForLenVn(uLimitVN); + // Check if array size can be obtained. + arrSize = m_pCompiler->vnStore->GetNewArrSize(arrRefVN); + } + else { + // If the upper limit is not length, then bail. return false; } - // Get the array reference from the length. - ValueNum arrRefVN = m_pCompiler->vnStore->GetArrForLenVn(uLimitVN); - #ifdef DEBUG JITDUMP("Array ref VN"); if (m_pCompiler->verbose) @@ -98,8 +116,6 @@ bool RangeCheck::BetweenBounds(Range& range, int lower, GenTreePtr upper) JITDUMP("\n"); #endif - // Check if array size can be obtained. - int arrSize = m_pCompiler->vnStore->GetNewArrSize(arrRefVN); JITDUMP("Array size is: %d\n", arrSize); // Upper limit: a.len + ucns (upper limit constant). @@ -125,9 +141,7 @@ bool RangeCheck::BetweenBounds(Range& range, int lower, GenTreePtr upper) } // Since upper limit is bounded by the array, return true if lower bound is good. - // TODO-CQ: I am retaining previous behavior to minimize ASM diffs, but we could - // return "true" here if GetConstant() >= 0 instead of "== 0". - if (range.LowerLimit().IsConstant() && range.LowerLimit().GetConstant() == 0) + if (range.LowerLimit().IsConstant() && range.LowerLimit().GetConstant() >= 0) { return true; } @@ -207,7 +221,23 @@ void RangeCheck::OptimizeRangeCheck(BasicBlock* block, GenTreePtr stmt, GenTreeP // Take care of constant index first, like a[2], for example. ValueNum idxVn = treeIndex->gtVNPair.GetConservative(); ValueNum arrLenVn = bndsChk->gtArrLen->gtVNPair.GetConservative(); - int arrSize = GetArrLength(arrLenVn); + int arrSize = 0; + + if (m_pCompiler->vnStore->IsVNConstant(arrLenVn)) + { + ssize_t constVal = -1; + unsigned iconFlags = 0; + + if (m_pCompiler->optIsTreeKnownIntValue(true, bndsChk->gtArrLen, &constVal, &iconFlags)) + { + arrSize = (int)constVal; + } + } + else + { + arrSize = GetArrLength(arrLenVn); + } + JITDUMP("ArrSize for lengthVN:%03X = %d\n", arrLenVn, arrSize); if (m_pCompiler->vnStore->IsVNConstant(idxVn) && arrSize > 0) { @@ -458,9 +488,9 @@ void RangeCheck::SetDef(UINT64 hash, Location* loc) // Merge assertions on the edge flowing into the block about a variable. -void RangeCheck::MergeEdgeAssertions(GenTreePtr tree, EXPSET_TP assertions, Range* pRange) +void RangeCheck::MergeEdgeAssertions(GenTreePtr tree, const ASSERT_VALARG_TP assertions, Range* pRange) { - if (assertions == 0) + if (BitVecOps::IsEmpty(m_pCompiler->apTraits, assertions)) { return; } @@ -471,19 +501,18 @@ void RangeCheck::MergeEdgeAssertions(GenTreePtr tree, EXPSET_TP assertions, Rang return; } // Walk through the "assertions" to check if the apply. - unsigned index = 1; - for (EXPSET_TP mask = 1; index <= m_pCompiler->GetAssertionCount(); index++, mask <<= 1) + BitVecOps::Iter iter(m_pCompiler->apTraits, assertions); + unsigned index = 0; + while (iter.NextElem(m_pCompiler->apTraits, &index)) { - if ((assertions & mask) == 0) - { - continue; - } + index++; - Compiler::AssertionDsc* curAssertion = m_pCompiler->optGetAssertion(index); + Compiler::AssertionDsc* curAssertion = m_pCompiler->optGetAssertion((Compiler::AssertionIndex)index); // Current assertion is about array length. if (!curAssertion->IsArrLenArithBound() && - !curAssertion->IsArrLenBound()) + !curAssertion->IsArrLenBound() && + !curAssertion->IsConstantBound()) { continue; } @@ -491,18 +520,22 @@ void RangeCheck::MergeEdgeAssertions(GenTreePtr tree, EXPSET_TP assertions, Rang #ifdef DEBUG if (m_pCompiler->verbose) { - m_pCompiler->optPrintAssertion(curAssertion, index); + m_pCompiler->optPrintAssertion(curAssertion, (Compiler::AssertionIndex)index); } #endif assert(m_pCompiler->vnStore->IsVNArrLenArithBound(curAssertion->op1.vn) || - m_pCompiler->vnStore->IsVNArrLenBound(curAssertion->op1.vn)); + m_pCompiler->vnStore->IsVNArrLenBound(curAssertion->op1.vn) || + m_pCompiler->vnStore->IsVNConstantBound(curAssertion->op1.vn)); - ValueNumStore::ArrLenArithBoundInfo info; Limit limit(Limit::keUndef); + genTreeOps cmpOper = GT_NONE; + // Current assertion is of the form (i < a.len - cns) != 0 if (curAssertion->IsArrLenArithBound()) { + ValueNumStore::ArrLenArithBoundInfo info; + // Get i, a.len, cns and < as "info." m_pCompiler->vnStore->GetArrLenArithBoundInfo(curAssertion->op1.vn, &info); @@ -526,10 +559,14 @@ void RangeCheck::MergeEdgeAssertions(GenTreePtr tree, EXPSET_TP assertions, Rang limit = Limit(Limit::keBinOpArray, info.vnArray, info.arrOper == GT_SUB ? -cons : cons); } } + + cmpOper = (genTreeOps)info.cmpOper; } // Current assertion is of the form (i < a.len) != 0 else if (curAssertion->IsArrLenBound()) { + ValueNumStore::ArrLenArithBoundInfo info; + // Get the info as "i", "<" and "a.len" m_pCompiler->vnStore->GetArrLenBoundInfo(curAssertion->op1.vn, &info); @@ -541,6 +578,26 @@ void RangeCheck::MergeEdgeAssertions(GenTreePtr tree, EXPSET_TP assertions, Rang } limit.type = Limit::keArray; limit.vn = info.vnArray; + cmpOper = (genTreeOps)info.cmpOper; + } + // Current assertion is of the form (i < 100) != 0 + else if (curAssertion->IsConstantBound()) + { + ValueNumStore::ConstantBoundInfo info; + + // Get the info as "i", "<" and "100" + m_pCompiler->vnStore->GetConstantBoundInfo(curAssertion->op1.vn, &info); + + ValueNum lclVn = m_pCompiler->lvaTable[lcl->gtLclNum].GetPerSsaData(lcl->gtSsaNum)->m_vnPair.GetConservative(); + + // If we don't have the same variable we are comparing against, bail. + if (lclVn != info.cmpOpVN) + { + continue; + } + + limit = Limit(Limit::keConstant, ValueNumStore::NoVN, info.constVal); + cmpOper = (genTreeOps)info.cmpOper; } else { @@ -558,21 +615,29 @@ void RangeCheck::MergeEdgeAssertions(GenTreePtr tree, EXPSET_TP assertions, Rang continue; } #ifdef DEBUG - if (m_pCompiler->verbose) m_pCompiler->optPrintAssertion(curAssertion, index); + if (m_pCompiler->verbose) m_pCompiler->optPrintAssertion(curAssertion, (Compiler::AssertionIndex)index); #endif - noway_assert(limit.IsBinOpArray() || limit.IsArray()); + noway_assert(limit.IsBinOpArray() || limit.IsArray() || limit.IsConstant()); ValueNum arrLenVN = m_pCurBndsChk->gtArrLen->gtVNPair.GetConservative(); - ValueNum arrRefVN = m_pCompiler->vnStore->GetArrForLenVn(arrLenVN); + ValueNum arrRefVN = ValueNumStore::NoVN; + + if (m_pCompiler->vnStore->IsVNArrLen(arrLenVN)) + { + // Get the array reference from the length. + arrRefVN = m_pCompiler->vnStore->GetArrForLenVn(arrLenVN); + } // During assertion prop we add assertions of the form: // // (i < a.Length) == 0 // (i < a.Length) != 0 + // (i < 100) == 0 + // (i < 100) != 0 // - // At this point, we have detected that op1.vn is (i < a.Length) or (i < a.Length + cns), - // and the op2.vn is 0. + // At this point, we have detected that op1.vn is (i < a.Length) or (i < a.Length + cns) or + // (i < 100) and the op2.vn is 0. // // Now, let us check if we are == 0 (i.e., op1 assertion is false) or != 0 (op1 assertion // is true.), @@ -580,7 +645,6 @@ void RangeCheck::MergeEdgeAssertions(GenTreePtr tree, EXPSET_TP assertions, Rang // If we have an assertion of the form == 0 (i.e., equals false), then reverse relop. // The relop has to be reversed because we have: (i < a.Length) is false which is the same // as (i >= a.Length). - genTreeOps cmpOper = (genTreeOps) info.cmpOper; if (curAssertion->assertionKind == Compiler::OAK_EQUAL) { cmpOper = GenTree::ReverseRelop(cmpOper); @@ -674,7 +738,7 @@ void RangeCheck::MergeEdgeAssertions(GenTreePtr tree, EXPSET_TP assertions, Rang void RangeCheck::MergeAssertion(BasicBlock* block, GenTreePtr stmt, GenTreePtr op, SearchPath* path, Range* pRange DEBUGARG(int indent)) { JITDUMP("Merging assertions from pred edges of BB%02d for op(%p) $%03x\n", block->bbNum, dspPtr(op), op->gtVNPair.GetConservative()); - EXPSET_TP assertions = 0; + ASSERT_TP assertions = BitVecOps::UninitVal(); // If we have a phi arg, we can get to the block from it and use its assertion out. if (op->gtOper == GT_PHI_ARG) @@ -684,7 +748,7 @@ void RangeCheck::MergeAssertion(BasicBlock* block, GenTreePtr stmt, GenTreePtr o if (pred->bbFallsThrough() && pred->bbNext == block) { assertions = pred->bbAssertionOut; - JITDUMP("Merge assertions from pred BB%02d edge: %0I64X\n", pred->bbNum, assertions); + JITDUMP("Merge assertions from pred BB%02d edge: %s\n", pred->bbNum, BitVecOps::ToString(m_pCompiler->apTraits, assertions)); } else if ((pred->bbJumpKind == BBJ_COND || pred->bbJumpKind == BBJ_ALWAYS) && pred->bbJumpDest == block) { @@ -701,8 +765,11 @@ void RangeCheck::MergeAssertion(BasicBlock* block, GenTreePtr stmt, GenTreePtr o assertions = block->bbAssertionIn; } - // Perform the merge step to fine tune the range value. - MergeEdgeAssertions(op, assertions, pRange); + if (!BitVecOps::MayBeUninit(assertions)) + { + // Perform the merge step to fine tune the range value. + MergeEdgeAssertions(op, assertions, pRange); + } } diff --git a/src/jit/rangecheck.h b/src/jit/rangecheck.h index f8038849af..9be73c51d3 100644 --- a/src/jit/rangecheck.h +++ b/src/jit/rangecheck.h @@ -430,7 +430,20 @@ struct RangeOps { result.lLimit = r1lo; } - + // Widen Upper Limit => Max(k, (a.len + n)) yields (a.len + n), + // This is correct if k >= 0 and n >= k, since a.len always >= 0 + // (a.len + n) could overflow, but the result (a.len + n) also + // preserves the overflow. + if (r1hi.IsConstant() && r1hi.GetConstant() >= 0 && + r2hi.IsBinOpArray() && r2hi.GetConstant() >= r1hi.GetConstant()) + { + result.uLimit = r2hi; + } + if (r2hi.IsConstant() && r2hi.GetConstant() >= 0 && + r1hi.IsBinOpArray() && r1hi.GetConstant() >= r2hi.GetConstant()) + { + result.uLimit = r1hi; + } if (r1hi.IsBinOpArray() && r2hi.IsBinOpArray() && r1hi.vn == r2hi.vn) { result.uLimit = r1hi; @@ -532,7 +545,7 @@ public: // Inspect the "assertions" and extract assertions about the given "phiArg" and // refine the "pRange" value. - void MergeEdgeAssertions(GenTreePtr phiArg, EXPSET_TP assertions, Range* pRange); + void MergeEdgeAssertions(GenTreePtr phiArg, const ASSERT_VALARG_TP assertions, Range* pRange); // The maximum possible value of the given "limit." If such a value could not be determined // return "false." For example: ARRLEN_MAX for array length. diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp index c2828ec0c8..b8085e6b71 100644 --- a/src/jit/rationalize.cpp +++ b/src/jit/rationalize.cpp @@ -867,8 +867,8 @@ void Rationalizer::MorphAsgIntoStoreLcl(GenTreeStmt* stmt, GenTreePtr pTree) GenTreeLclVarCommon* var = lhs->AsLclVarCommon(); pTree->SetOper(storeForm(var->OperGet())); GenTreeLclVarCommon* dst = pTree->AsLclVarCommon(); - dst->SetSsaNum(var->gtSsaNum); dst->SetLclNum(var->gtLclNum); + dst->SetSsaNum(var->gtSsaNum); dst->gtType = lhs->gtType; if (lhs->OperGet() == GT_LCL_FLD) @@ -943,24 +943,44 @@ void Compiler::fgFixupIfCallArg(ArrayStack<GenTree *> *parentStack, DBEXEC(VERBOSE, dumpTreeStack(GetTlsCompiler(), parentStack)); return; } - + // we have replaced an arg, so update pointers in argtable + fgFixupArgTabEntryPtr(parentCall, oldChild, newChild); +} + +//------------------------------------------------------------------------ +// fgFixupArgTabEntryPtr: Fixup the fgArgTabEntryPtr of parentCall after +// replacing oldArg with newArg +// +// Arguments: +// parentCall - a pointer to the parent call node +// oldArg - the original argument node +// newArg - the replacement argument node +// + +void Compiler::fgFixupArgTabEntryPtr(GenTreePtr parentCall, + GenTreePtr oldArg, + GenTreePtr newArg) +{ + assert(parentCall != nullptr); + assert(oldArg != nullptr); + assert(newArg != nullptr); + JITDUMP("parent call was :\n"); DISPTREE(parentCall); JITDUMP("old child was :\n"); - DISPTREE(oldChild); - - DBEXEC(VERBOSE, dumpTreeStack(GetTlsCompiler(), parentStack)); + DISPTREE(oldArg); - if (oldChild->gtFlags & GTF_LATE_ARG) + if (oldArg->gtFlags & GTF_LATE_ARG) { - newChild->gtFlags |= GTF_LATE_ARG; + newArg->gtFlags |= GTF_LATE_ARG; } else { - fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(parentCall, oldChild); - fp->node = newChild; + fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(parentCall, oldArg); + assert(fp->node == oldArg); + fp->node = newArg; } JITDUMP("parent call:\n"); @@ -1078,13 +1098,22 @@ Location Rationalizer::RewriteSimpleTransforms(Location loc) DISPTREE(statement); JITDUMP("\n"); - if (tree->OperGet() == GT_COMMA && statement->gtStmtIsTopLevel()) + if (statement->gtStmtIsTopLevel()) { - Location loc1, loc2; - RewriteTopLevelComma(loc, &loc1, &loc2); - RewriteSimpleTransforms(loc1); - RewriteSimpleTransforms(loc2); - return loc1; + if (tree->OperGet() == GT_COMMA) + { + Location loc1, loc2; + RewriteTopLevelComma(loc, &loc1, &loc2); + RewriteSimpleTransforms(loc1); + RewriteSimpleTransforms(loc2); + return loc1; + } + else if (tree->OperKind() & GTK_CONST) + { + // Don't bother generating a top level statement that is just a constant. + // We can get these if we decide to hoist a large constant value out of a loop. + tree->gtBashToNOP(); + } } SplitData tmpState = {0}; @@ -1436,7 +1465,7 @@ void Rationalizer::RewriteCopyBlk(GenTreePtr* ppTree, Compiler::fgWalkData* data // Src: Get rid of parent node of GT_ADDR(..) if its child happens to be of a SIMD type. GenTree* simdSrc = nullptr; - if (srcAddr->OperGet() == GT_ADDR && comp->isSIMDType(srcAddr->gtGetOp1())) + if (srcAddr->OperGet() == GT_ADDR && varTypeIsSIMD(srcAddr->gtGetOp1())) { comp->fgSnipInnerNode(srcAddr); simdSrc = srcAddr->gtGetOp1(); @@ -1583,12 +1612,124 @@ void Rationalizer::RewriteLdObj(GenTreePtr* ppTree, Compiler::fgWalkData* data) #endif } +// RewriteNodeAsCall : Replace the given tree node by a GT_CALL. +// +// Arguments: +// ppTree - A pointer-to-a-pointer for the tree node +// fgWalkData - A pointer to tree walk data providing the context +// callHnd - The method handle of the call to be generated +// args - The argument list of the call to be generated +// +// Return Value: +// None. +// + +void Rationalizer::RewriteNodeAsCall(GenTreePtr* ppTree, Compiler::fgWalkData* data, CORINFO_METHOD_HANDLE callHnd, GenTreeArgList* args) +{ + GenTreePtr tree = *ppTree; + Compiler* comp = data->compiler; + SplitData* tmpState = (SplitData *)data->pCallbackData; + GenTreePtr root = tmpState->root; + GenTreePtr treeFirstNode = comp->fgGetFirstNode(tree); + GenTreePtr treeLastNode = tree; + GenTreePtr treePrevNode = treeFirstNode->gtPrev; + GenTreePtr treeNextNode = treeLastNode->gtNext; + + // Create the call node + GenTreeCall* call = comp->gtNewCallNode(CT_USER_FUNC, callHnd, tree->gtType, args); + call = comp->fgMorphArgs(call); + call->CopyCosts(tree); + + // Replace "tree" with "call" + *ppTree = call; + + // Rebuild the evaluation order. + comp->gtSetStmtInfo(root); + + // Rebuild the execution order. + comp->fgSetTreeSeq(call, treePrevNode); + + // Restore linear-order Prev and Next for "call". + if (treePrevNode) + { + treeFirstNode = comp->fgGetFirstNode(call); + treeFirstNode->gtPrev = treePrevNode; + treePrevNode->gtNext = treeFirstNode; + } + else + { + // Update the linear oder start of "root" if treeFirstNode + // appears to have replaced the original first node. + assert(treeFirstNode == root->gtStmt.gtStmtList); + root->gtStmt.gtStmtList = comp->fgGetFirstNode(call); + } + + if (treeNextNode) + { + treeLastNode = call; + treeLastNode->gtNext = treeNextNode; + treeNextNode->gtPrev = treeLastNode; + } + + comp->fgFixupIfCallArg(data->parentStack, tree, call); + + // Propagate flags of "call" to its parents. + // 0 is current node, so start at 1 + for (int i = 1; i < data->parentStack->Height(); i++) + { + GenTree *node = data->parentStack->Index(i); + node->gtFlags |= GTF_CALL; + node->gtFlags |= call->gtFlags & GTF_ALL_EFFECT; + } + + // Since "tree" is replaced with "call", pop "tree" node (i.e the current node) + // and replace it with "call" on parent stack. + assert(data->parentStack->Top() == tree); + (void)data->parentStack->Pop(); + data->parentStack->Push(call); + + DBEXEC(TRUE, ValidateStatement(root, tmpState->block)); +} + +// RewriteIntrinsicAsUserCall : Rewrite an intrinsic operator as a GT_CALL to the original method. +// +// Arguments: +// ppTree - A pointer-to-a-pointer for the intrinsic node +// fgWalkData - A pointer to tree walk data providing the context +// +// Return Value: +// None. +// +// Some intrinsics, such as operation Sqrt, are rewritten back to calls, and some are not. +// The ones that are not being rewritten here must be handled in Codegen. +// Conceptually, the lower is the right place to do the rewrite. Keeping it in rationalization is +// mainly for throughput issue. + +void Rationalizer::RewriteIntrinsicAsUserCall(GenTreePtr* ppTree, Compiler::fgWalkData* data) +{ + GenTreePtr tree = *ppTree; + Compiler* comp = data->compiler; + GenTreeArgList* args; + + assert(tree->OperGet() == GT_INTRINSIC); + + if (tree->gtOp.gtOp2 == nullptr) + { + args = comp->gtNewArgList(tree->gtOp.gtOp1); + } + else + { + args = comp->gtNewArgList(tree->gtOp.gtOp1, tree->gtOp.gtOp2); + } + + RewriteNodeAsCall(ppTree, data, tree->gtIntrinsic.gtMethodHandle, args); +} + // tree walker callback function that rewrites ASG and ADDR nodes Compiler::fgWalkResult Rationalizer::SimpleTransformHelper(GenTree **ppTree, Compiler::fgWalkData *data) { GenTree *tree = *ppTree; Compiler* comp = data->compiler; - SplitData *tmpState = (SplitData *) data->pCallbackData; while (tree->OperGet() == GT_COMMA) @@ -1616,32 +1757,8 @@ Compiler::fgWalkResult Rationalizer::SimpleTransformHelper(GenTree **ppTree, Com case GT_LCL_FLD: case GT_REG_VAR: case GT_PHI_ARG: - FixupIfSIMDLocal(comp, lhs->AsLclVarCommon()); MorphAsgIntoStoreLcl(tmpState->root->AsStmt(), tree); tree->gtFlags &= ~GTF_REVERSE_OPS; - -#if defined(FEATURE_SIMD) && defined(_TARGET_AMD64_) - // Vector2 parameter passing: A Vector2 struct is pointer size and as per - // ABI needs to be passed in an integer register. But at the same time - // a Vector2 is also considered TYP_DOUBLE by SIMD logic and hence will - // be allocated an XMM reg. Hence, passing Vector2 as a parameter to - // a method could result in either putarg_reg (post lowering) or st.loc - // where the target type is TYP_LONG and source type is TYP_DOUBLE. Similarly - // trying to return Vector2 value from a method will result in gt_return - // with mismatch in src and target types. LSRA already handles putarg_Reg and - // gt_return cases by introducing GT_COPY above the source value. The logic - // here is meant to handle st.loc case. - dataSrc = tree->gtGetOp1(); - if (tree->TypeGet() == TYP_I_IMPL && - dataSrc->TypeGet() == TYP_I_IMPL && - comp->isSIMDTypeLocal(dataSrc)) - { - // Introduce a GT_COPY above RHS - GenTreePtr newNode = comp->gtNewOperNode(GT_COPY, TYP_LONG, dataSrc); - tree->gtOp.gtOp1 = newNode; - dataSrc->InsertAfterSelf(newNode, tmpState->root->AsStmt()); - } -#endif // FEATURE_SIMD && _TARGET_AMD64_ break; case GT_IND: @@ -1669,6 +1786,12 @@ Compiler::fgWalkResult Rationalizer::SimpleTransformHelper(GenTree **ppTree, Com assert (store->gtPrev != nullptr); store->gtPrev->gtNext = store; + // Since "tree" is replaced with "store", pop "tree" node (i.e the current node) + // and replace it with "store" on parent stack. + assert(data->parentStack->Top() == tree); + (void)data->parentStack->Pop(); + data->parentStack->Push(store); + JITDUMP("root:\n"); DISPTREE(tmpState->root); JITDUMP("\n"); @@ -1708,8 +1831,6 @@ Compiler::fgWalkResult Rationalizer::SimpleTransformHelper(GenTree **ppTree, Com GenTree *child = tree->gtOp.gtOp1; if (child->IsLocal()) { - FixupIfSIMDLocal(comp, child->AsLclVarCommon()); - // We are changing the child from GT_LCL_VAR TO GT_LCL_VAR_ADDR. // Therefore gtType of the child needs to be changed to a TYP_BYREF @@ -1813,9 +1934,16 @@ Compiler::fgWalkResult Rationalizer::SimpleTransformHelper(GenTree **ppTree, Com JITDUMP("\n"); } #endif // _TARGET_XARCH_ + else if ((tree->gtOper == GT_INTRINSIC) && + Compiler::IsIntrinsicImplementedByUserCall(tree->gtIntrinsic.gtIntrinsicId)) + { + RewriteIntrinsicAsUserCall(ppTree, data); + } #ifdef FEATURE_SIMD else - { + { + assert(tree->gtOper != GT_INTRINSIC || Compiler::IsTargetIntrinsic(tree->gtIntrinsic.gtIntrinsicId)); + // Transform the treeNode types for SIMD nodes. // If we have a SIMD type, set its size in simdSize, and later we will // set the actual type according to its size (which may be less than a full @@ -1865,7 +1993,7 @@ Compiler::fgWalkResult Rationalizer::SimpleTransformHelper(GenTree **ppTree, Com // This happens when it is consumed by a GT_RET_EXPR. // It can only be a Vector2f or Vector2i. assert(genTypeSize(simdTree->gtSIMDBaseType) == 4); - simdTree->gtType = TYP_DOUBLE; + simdTree->gtType = TYP_SIMD8; } else if (simdTree->gtType == TYP_STRUCT || varTypeIsSIMD(simdTree)) { @@ -1924,11 +2052,6 @@ Compiler::fgWalkResult Rationalizer::SimpleTransformHelper(GenTree **ppTree, Com } break; - - case GT_LCL_VAR: - case GT_STORE_LCL_VAR: - FixupIfSIMDLocal(comp, tree->AsLclVarCommon()); - break; } if ((*ppTree) != tree) { @@ -1966,17 +2089,22 @@ void Rationalizer::FixupIfSIMDLocal(Compiler* comp, GenTreeLclVarCommon* tree) // Note that struct args though marked as lvIsSIMD=true, // the tree node representing such an arg should not be // marked as a SIMD type, since it is a byref of a SIMD type. - if (!varDsc->lvSIMDType || tree->gtType == TYP_BYREF) + if (!varTypeIsSIMD(varDsc)) { return; } switch(tree->OperGet()) { case GT_LCL_FLD: - if (tree->AsLclFld()->gtFieldSeq == FieldSeqStore::NotAField() && tree->AsLclFld()->gtLclOffs == 0) + // We may see a lclFld used for pointer-sized structs that have been morphed, in which + // case we can change it to GT_LCL_VAR. + // However, we may also see a lclFld with FieldSeqStore::NotAField() for structs that can't + // be analyzed, e.g. those with overlapping fields such as the IL implementation of Vector<T>. + if ((tree->AsLclFld()->gtFieldSeq == FieldSeqStore::NotAField()) && + (tree->AsLclFld()->gtLclOffs == 0) && + (tree->gtType == TYP_I_IMPL) && + (varDsc->lvExactSize == TARGET_POINTER_SIZE)) { - // We will only see this for pointer-sized structs that have been morphed. - assert(tree->gtType == TYP_I_IMPL); tree->SetOper(GT_LCL_VAR); tree->gtFlags &= ~(GTF_VAR_USEASG); } @@ -1992,13 +2120,6 @@ void Rationalizer::FixupIfSIMDLocal(Compiler* comp, GenTreeLclVarCommon* tree) tree->SetOper(GT_STORE_LCL_VAR); tree->gtFlags &= ~(GTF_VAR_USEASG); break; - case GT_LCL_VAR: - case GT_STORE_LCL_VAR: - // This is either TYP_STRUCT or a SIMD type, or a 8 byte SIMD that has already been transformed. - assert(tree->gtType == TYP_STRUCT || - varTypeIsSIMD(tree->gtType) || - (varDsc->lvExactSize == 8 && tree->gtType == TYP_DOUBLE)); - break; } unsigned simdSize = (unsigned int) roundUp(varDsc->lvExactSize, TARGET_POINTER_SIZE); tree->gtType = comp->getSIMDTypeForSize(simdSize); diff --git a/src/jit/rationalize.h b/src/jit/rationalize.h index 6f2c1d8246..d893a2376c 100644 --- a/src/jit/rationalize.h +++ b/src/jit/rationalize.h @@ -172,6 +172,10 @@ private: static void RewriteLdObj(GenTreePtr* ppTree, Compiler::fgWalkData* data); static void RewriteCopyBlk(GenTreePtr* ppTree, Compiler::fgWalkData* data); static void RewriteInitBlk(GenTreePtr* ppTree, Compiler::fgWalkData* data); + + // Intrinsic related + static void RewriteNodeAsCall(GenTreePtr* ppTree, Compiler::fgWalkData* data, CORINFO_METHOD_HANDLE callHnd, GenTreeArgList* args); + static void RewriteIntrinsicAsUserCall(GenTreePtr* ppTree, Compiler::fgWalkData* data); }; inline Rationalizer::Rationalizer(Compiler* _comp) diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp index 89945301f0..78ca194ff3 100644 --- a/src/jit/regalloc.cpp +++ b/src/jit/regalloc.cpp @@ -676,6 +676,20 @@ regNumber Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *ar regState->rsCalleeRegArgMaskLiveIn |= genRegMask(inArgReg); +#if FEATURE_MULTIREG_STRUCT_ARGS +#ifdef _TARGET_ARM64_ + if (argDsc->lvOtherArgReg != REG_NA) + { + regNumber secondArgReg = argDsc->lvOtherArgReg; + + noway_assert(regState->rsIsFloat == false); + noway_assert(genRegMask(secondArgReg) & RBM_ARG_REGS); + + regState->rsCalleeRegArgMaskLiveIn |= genRegMask(secondArgReg); + } +#endif // TARGET_ARM64_ +#endif // FEATURE_MULTIREG_STRUCT_ARGS + #ifdef _TARGET_ARM_ if (argDsc->lvType == TYP_DOUBLE) { @@ -725,7 +739,8 @@ regNumber Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *ar } } } -#endif +#endif // _TARGET_ARM_ + return inArgReg; } @@ -2834,16 +2849,7 @@ ASG_COMMON: /* Casting from integral type to floating type is special */ if (!varTypeIsFloating(type) && varTypeIsFloating(op1->TypeGet())) { - // If dblwasInt - if (!opts.compCanUseSSE2) - { - // if SSE2 is not enabled this can only be a DblWasInt case - assert(gtDblWasInt(op1)); - regMask = rpPredictRegPick(type, PREDICT_SCRATCH_REG, lockedRegs); - tree->gtUsedRegs = (regMaskSmall)regMask; - goto RETURN_CHECK; - } - else + if (opts.compCanUseSSE2) { // predict for SSE2 based casting if (predictReg <= PREDICT_REG) @@ -2884,12 +2890,10 @@ ASG_COMMON: /* otherwise must load op1 into a register */ goto GENERIC_UNARY; -#if INLINE_MATH - - case GT_MATH: + case GT_INTRINSIC: #ifdef _TARGET_XARCH_ - if (tree->gtMath.gtMathFN==CORINFO_INTRINSIC_Round && + if (tree->gtIntrinsic.gtIntrinsicId==CORINFO_INTRINSIC_Round && tree->TypeGet()==TYP_INT) { // This is a special case to handle the following @@ -2908,7 +2912,6 @@ ASG_COMMON: } #endif __fallthrough; -#endif case GT_NEG: #ifdef _TARGET_ARM_ diff --git a/src/jit/registerfp.cpp b/src/jit/registerfp.cpp index b5a40b9d1f..7cbfc963f8 100644 --- a/src/jit/registerfp.cpp +++ b/src/jit/registerfp.cpp @@ -78,7 +78,7 @@ void CodeGen::genFloatConst(GenTree *tree, RegSet::RegisterPreference *pref) void CodeGen::genFloatMath(GenTree *tree, RegSet::RegisterPreference *pref) { - assert(tree->OperGet() == GT_MATH); + assert(tree->OperGet() == GT_INTRINSIC); GenTreePtr op1 = tree->gtOp.gtOp1; @@ -87,7 +87,7 @@ void CodeGen::genFloatMath(GenTree *tree, RegSet::RegisterPreference *pref) instruction ins; - switch (tree->gtMath.gtMathFN) + switch (tree->gtIntrinsic.gtIntrinsicId) { case CORINFO_INTRINSIC_Sin: ins = INS_invalid; @@ -215,7 +215,7 @@ void CodeGen::genFloatSimple(GenTree *tree, RegSet::RegisterPreference *pref) genFloatAsgArith(tree); break; } - case GT_MATH: + case GT_INTRINSIC: genFloatMath(tree, pref); break; diff --git a/src/jit/regset.cpp b/src/jit/regset.cpp index 15e6eb7275..f167879283 100644 --- a/src/jit/regset.cpp +++ b/src/jit/regset.cpp @@ -147,6 +147,31 @@ void RegSet::rsRemoveRegsModified(regMaskTP mask) rsModifiedRegsMask &= ~mask; } +void RegSet::SetMaskVars(regMaskTP newMaskVars) +{ +#ifdef DEBUG + if (m_rsCompiler->verbose) + { + printf("\t\t\t\t\t\t\tLive regs: "); + if (_rsMaskVars == newMaskVars) + { + printf("(unchanged) "); + } + else + { + printRegMaskInt(_rsMaskVars); + m_rsCompiler->getEmitter()->emitDispRegSet(_rsMaskVars); + printf(" => "); + } + printRegMaskInt(newMaskVars); + m_rsCompiler->getEmitter()->emitDispRegSet(newMaskVars); + printf("\n"); + } +#endif // DEBUG + + _rsMaskVars = newMaskVars; +} + #ifdef DEBUG RegSet::rsStressRegsType RegSet::rsStressRegs() @@ -1440,10 +1465,7 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree) tree->gtFlags &= ~GTF_SPILL; #endif // !LEGACY_BACKEND -#ifdef _TARGET_AMD64_ - assert(tree->InReg()); - assert(tree->gtRegNum == reg); -#else +#if CPU_LONG_USES_REGPAIR /* Are we spilling a part of a register pair? */ if (treeType == TYP_LONG) @@ -1457,7 +1479,10 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree) assert(tree->gtFlags & GTF_REG_VAL); assert(tree->gtRegNum == reg); } -#endif // _TARGET_AMD64_ +#else + assert(tree->InReg()); + assert(tree->gtRegNum == reg); +#endif // CPU_LONG_USES_REGPAIR /* Are any registers free for spillage? */ diff --git a/src/jit/regset.h b/src/jit/regset.h index 985cee71d7..3b0ccae614 100644 --- a/src/jit/regset.h +++ b/src/jit/regset.h @@ -155,10 +155,36 @@ public: // TODO-Cleanup: Should be private, but GCInfo uses them #ifdef LEGACY_BACKEND regMaskTP rsMaskUsed; // currently 'used' registers mask #endif // LEGACY_BACKEND - regMaskTP rsMaskVars; // mask of registers currently allocated to variables + + __declspec(property(get=GetMaskVars,put=SetMaskVars)) + regMaskTP rsMaskVars; // mask of registers currently allocated to variables + + regMaskTP GetMaskVars() const // 'get' property function for rsMaskVars property + { + return _rsMaskVars; + } + + void SetMaskVars(regMaskTP newMaskVars); // 'put' property function for rsMaskVars property + + void AddMaskVars(regMaskTP addMaskVars) // union 'addMaskVars' with the rsMaskVars set + { + SetMaskVars(_rsMaskVars | addMaskVars); + } + + void RemoveMaskVars(regMaskTP removeMaskVars) // remove 'removeMaskVars' from the rsMaskVars set (like bitset DiffD) + { + SetMaskVars(_rsMaskVars & ~removeMaskVars); + } + + void ClearMaskVars() // Like SetMaskVars(RBM_NONE), but without any debug output. + { + _rsMaskVars = RBM_NONE; + } private: + regMaskTP _rsMaskVars; // backing store for rsMaskVars property + #ifdef LEGACY_BACKEND regMaskTP rsMaskLock; // currently 'locked' registers mask regMaskTP rsMaskMult; // currently 'multiply used' registers mask diff --git a/src/jit/scopeinfo.cpp b/src/jit/scopeinfo.cpp index d85664b7f7..c235dc44ea 100644 --- a/src/jit/scopeinfo.cpp +++ b/src/jit/scopeinfo.cpp @@ -1043,6 +1043,10 @@ void CodeGen::psiBegProlog() void CodeGen::psiAdjustStackLevel(unsigned size) { +#ifdef DEBUGGING_SUPPORT + if (!compiler->opts.compScopeInfo || (compiler->info.compVarScopesCount == 0)) + return; + assert(compiler->compGeneratingProlog); #ifdef ACCURATE_PROLOG_DEBUG_INFO @@ -1069,6 +1073,7 @@ void CodeGen::psiAdjustStackLevel(unsigned size) } #endif // ACCURATE_PROLOG_DEBUG_INFO +#endif // DEBUGGING_SUPPORT } @@ -1082,6 +1087,10 @@ void CodeGen::psiAdjustStackLevel(unsigned size) void CodeGen::psiMoveESPtoEBP() { +#ifdef DEBUGGING_SUPPORT + if (!compiler->opts.compScopeInfo || (compiler->info.compVarScopesCount == 0)) + return; + assert(compiler->compGeneratingProlog); assert(doubleAlignOrFramePointerUsed()); @@ -1109,6 +1118,7 @@ void CodeGen::psiMoveESPtoEBP() } #endif // ACCURATE_PROLOG_DEBUG_INFO +#endif // DEBUGGING_SUPPORT } @@ -1126,6 +1136,7 @@ void CodeGen::psiMoveToReg (unsigned varNum, regNumber reg, regNumber otherReg) { +#ifdef DEBUGGING_SUPPORT assert(compiler->compGeneratingProlog); if (!compiler->opts.compScopeInfo) @@ -1175,6 +1186,7 @@ void CodeGen::psiMoveToReg (unsigned varNum, !"Parameter scope not found (Assert doesnt always indicate error)"); #endif // ACCURATE_PROLOG_DEBUG_INFO +#endif // DEBUGGING_SUPPORT } @@ -1187,6 +1199,10 @@ void CodeGen::psiMoveToReg (unsigned varNum, void CodeGen::psiMoveToStack(unsigned varNum) { +#ifdef DEBUGGING_SUPPORT + if (!compiler->opts.compScopeInfo || (compiler->info.compVarScopesCount == 0)) + return; + assert(compiler->compGeneratingProlog); assert( compiler->lvaTable[varNum].lvIsRegArg); assert(!compiler->lvaTable[varNum].lvRegister); @@ -1223,6 +1239,7 @@ void CodeGen::psiMoveToStack(unsigned varNum) !"Parameter scope not found (Assert doesnt always indicate error)"); #endif // ACCURATE_PROLOG_DEBUG_INFO +#endif // DEBUGGING_SUPPORT } /***************************************************************************** diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp index 7e7169cbf7..c90a70e34f 100644 --- a/src/jit/simd.cpp +++ b/src/jit/simd.cpp @@ -11,10 +11,7 @@ // This implementation is preliminary, and may change dramatically. // // New JIT types, TYP_SIMDxx, are introduced, and the SIMD intrinsics are created as GT_SIMD nodes. -// In this initial implementation, the SIMD types are kept as TYP_STRUCT, and only transformed to -// SIMD types in Lowering. Eventually, we will want to make SIMD types "first class" types. -// This will require ensuring that the JIT front-end can handle, and effectively optimize, the -// SIMD type nodes and lclVars. +// Nodes of SIMD types will be typed as TYP_SIMD* (e.g. TYP_SIMD8, TYP_SIMD16, etc.). // // Note that currently the "reference implementation" is the same as the runtime dll. As such, it is currently // providing implementations for those methods not currently supported by the JIT as intrinsics. @@ -108,64 +105,6 @@ int Compiler::getSIMDTypeAlignment(CORINFO_CLASS_HANDLE typeHnd) #endif } -#ifdef RYUJIT_CTPBUILD -// SIMDVector assembly -// static -volatile CORINFO_ASSEMBLY_HANDLE Compiler::SIMDAssemblyHandle = nullptr; - -//------------------------------------------------------------------------ -// isSIMDModule: If the SIMDVector assembly has previously been found, and its handle -// saved, check the moduleHnd to see if it is in the SIMDVector assembly. -// -// Arguments: -// moduleHnd - The handle of the module we're interested in. -// -// Return Value: -// Returns true if this module is in the SIMDVector assembly. -// -// Notes: -// This will only return true if this is a class constructor, and it is referencing -// a SIMD type, OR if we have already found the SIMDVector assembly. - -bool Compiler::isSIMDModule(CORINFO_MODULE_HANDLE moduleHnd) -{ - assert(featureSIMD); - const char* assemblyName; - if (SIMDAssemblyHandle == nullptr) - { - // For the CTP, we only check for the SIMD assembly while compiling a class constructor. - // This is because in the CTP we don't have VM support for identifying the SIMD assembly, - // and we don't want to check on each method. - if ((info.compFlags & CORINFO_FLG_CONSTRUCTOR) == 0 || (info.compFlags & CORINFO_FLG_STATIC) == 0) - { - return false; - } - CORINFO_ASSEMBLY_HANDLE assemblyHnd = info.compCompHnd->getModuleAssembly(moduleHnd); - assemblyName = info.compCompHnd->getAssemblyName(assemblyHnd); - if ((assemblyName != nullptr && strcmp(assemblyName, "System.Numerics.Vectors") == 0)) - { - SIMDAssemblyHandle = assemblyHnd; - JITDUMP("Found SIMDVector (System.Numerics.Vectors) assembly.\n"); - return true; - } - else - { - return false; - } - } - else - { - CORINFO_ASSEMBLY_HANDLE assemblyHnd = info.compCompHnd->getModuleAssembly(moduleHnd); - if (assemblyHnd == SIMDAssemblyHandle) - { - JITDUMP("\nKnown SIMDVector assembly.\n"); - return true; - } - } - - return false; -} -#endif // RYUJIT_CTPBUILD //---------------------------------------------------------------------------------- // Return the base type and size of SIMD vector type given its type handle. @@ -629,6 +568,10 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* i } } + if (varTypeIsSIMD(argType)) + { + argType = TYP_STRUCT; + } if (argType != expectedArgType) { found = false; @@ -681,60 +624,18 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* i return nullptr; } -//------------------------------------------------------------------------ -// checkForSIMDType: Check lclVar to see if it is a SIMD type. -// This calls getBaseTypeOfSIMDType to do most of the work. If it is a known -// SIMD type, then lvSIMDType will be set to true on 'varDsc', lvBaseType -// will be set to the base type (element type) and lvExactSize will be set to -// size of the SIMD struct supported on the target machine. -// -// Arguments: -// varDsc - The lclVar we are interested in. -// typeHnd - The handle of the type of the lclVar. -// -// Return Value: -// None. -// -void Compiler::checkForSIMDType(LclVarDsc* varDsc, - CORINFO_CLASS_HANDLE typeHnd) -{ - assert(featureSIMD); - - // We should be calling this once per struct. - assert (!varDsc->lvIsSIMDType()); - if (varDsc->lvType != TYP_BYREF && - (varDsc->lvExactSize > getSIMDVectorRegisterByteLength() || varDsc->lvExactSize < 8 || varDsc->lvStructGcCount != 0)) - { - return; - } - - unsigned sizeBytes = 0; - var_types simdBaseType = getBaseTypeAndSizeOfSIMDType(typeHnd, &sizeBytes); - if (simdBaseType != TYP_UNKNOWN) - { - assert(simdBaseType != TYP_UNKNOWN); - varDsc->lvSIMDType = true; - varDsc->lvBaseType = simdBaseType; - - // Set the size of the simd struct. - // Note that the size of SIMD vector will dynamically depend on whether - // the underlying machine supports SSE2 (16 bytes) or AVX (32 bytes). - varDsc->lvExactSize = sizeBytes; - -#ifdef _TARGET_AMD64_ - // Amd64: also indicate that we use floating point registers - compFloatingPointUsed = true; -#endif - } -} - // Pops and returns GenTree node from importer's type stack. // Normalizes TYP_STRUCT value in case of GT_CALL, GT_RET_EXPR and arg nodes. // // Arguments: +// type - the type of value that the caller expects to be popped off the stack. // expectAddr - if true indicates we are expecting type stack entry to be a TYP_BYREF. // -GenTreePtr Compiler::impSIMDPopStack(bool expectAddr) +// Notes: +// If the popped value is a struct, and the expected type is a simd type, it will be set +// to that type, otherwise it will assert if the type being popped is not the expected type. + +GenTreePtr Compiler::impSIMDPopStack(var_types type, bool expectAddr) { StackEntry se = impPopStack(); typeInfo ti = se.seTypeInfo; @@ -751,7 +652,7 @@ GenTreePtr Compiler::impSIMDPopStack(bool expectAddr) } else { - tree = gtNewOperNode(GT_IND, TYP_STRUCT, tree); + tree = gtNewOperNode(GT_IND, type, tree); } } @@ -775,13 +676,25 @@ GenTreePtr Compiler::impSIMDPopStack(bool expectAddr) } // normalize TYP_STRUCT value - if ((tree->TypeGet() == TYP_STRUCT) && (tree->OperGet() == GT_RET_EXPR || tree->OperGet() == GT_CALL || isParam)) + if (varTypeIsStruct(tree) && ((tree->OperGet() == GT_RET_EXPR) || (tree->OperGet() == GT_CALL) || isParam)) { assert(ti.IsType(TI_STRUCT)); CORINFO_CLASS_HANDLE structType = ti.GetClassHandleForValueClass(); tree = impNormStructVal(tree, structType, (unsigned)CHECK_SPILL_ALL); } + // Now set the type of the tree to the specialized SIMD struct type, if applicable. + if (genActualType(tree->gtType) != genActualType(type)) + { + assert(tree->gtType == TYP_STRUCT); + tree->gtType = type; + } + else if (tree->gtType == TYP_BYREF) + { + assert(tree->IsLocal() || + (tree->gtOper == GT_ADDR) && varTypeIsSIMD(tree->gtGetOp1())); + } + return tree; } @@ -795,15 +708,15 @@ GenTreePtr Compiler::impSIMDPopStack(bool expectAddr) // Return Value: // Returns a GT_SIMD node with the SIMDIntrinsicGetItem intrinsic id. // -GenTreeSIMD* Compiler::impSIMDGetFixed(var_types baseType, +GenTreeSIMD* Compiler::impSIMDGetFixed(var_types simdType, + var_types baseType, unsigned simdSize, int index) { assert(simdSize >= ((index + 1) * genTypeSize(baseType))); // op1 is a SIMD source. - GenTree* op1 = impSIMDPopStack(true); - assert(op1->TypeGet() == TYP_STRUCT); + GenTree* op1 = impSIMDPopStack(simdType, true); GenTree* op2 = gtNewIconNode(index); GenTreeSIMD* simdTree = gtNewSIMDNode(baseType, op1, op2, SIMDIntrinsicGetItem, baseType, simdSize); @@ -825,11 +738,14 @@ GenTreeSIMD* Compiler::impSIMDGetFixed(var_types baseType, // SIMDIntrinsicID Compiler::impSIMDLongRelOpEqual(CORINFO_CLASS_HANDLE typeHnd, unsigned size, - GenTree** op1, - GenTree** op2) + GenTree** pOp1, + GenTree** pOp2) { + var_types simdType = (*pOp1)->TypeGet(); + assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType)); + // There is no direct SSE2 support for comparing TYP_LONG vectors. - // These have to be implemented interms of TYP_INT vector comparison operations. + // These have to be implemented in terms of TYP_INT vector comparison operations. // // Equality(v1, v2): // tmp = (v1 == v2) i.e. compare for equality as if v1 and v2 are vector<int> @@ -837,17 +753,17 @@ SIMDIntrinsicID Compiler::impSIMDLongRelOpEqual(CORINFO_CLASS_HANDLE typeHnd, // Shuffle is meant to swap the comparison results of low-32-bits and high 32-bits of respective long elements. // Compare vector<long> as if they were vector<int> and assign the result to a temp - GenTree* compResult = gtNewSIMDNode(TYP_STRUCT, *op1, *op2, SIMDIntrinsicEqual, TYP_INT, size); + GenTree* compResult = gtNewSIMDNode(simdType, *pOp1, *pOp2, SIMDIntrinsicEqual, TYP_INT, size); unsigned lclNum = lvaGrabTemp(true DEBUGARG("SIMD Long ==")); lvaSetStruct(lclNum, typeHnd, false); - GenTree* tmp = gtNewLclvNode(lclNum, TYP_STRUCT); + GenTree* tmp = gtNewLclvNode(lclNum, simdType); GenTree* asg = gtNewTempAssign(lclNum, compResult); // op1 = GT_COMMA(tmp=compResult, tmp) // op2 = Shuffle(tmp, 0xB1) // IntrinsicId = BitwiseAnd - *op1 = gtNewOperNode(GT_COMMA, TYP_STRUCT, asg, tmp); - *op2 = gtNewSIMDNode(TYP_STRUCT, gtNewLclvNode(lclNum, TYP_STRUCT), gtNewIconNode(SHUFFLE_ZWYX, TYP_INT), SIMDIntrinsicShuffleSSE2, TYP_INT, size); + *pOp1 = gtNewOperNode(GT_COMMA, simdType, asg, tmp); + *pOp2 = gtNewSIMDNode(simdType, gtNewLclvNode(lclNum, simdType), gtNewIconNode(SHUFFLE_ZWYX, TYP_INT), SIMDIntrinsicShuffleSSE2, TYP_INT, size); return SIMDIntrinsicBitwiseAnd; } @@ -857,17 +773,20 @@ SIMDIntrinsicID Compiler::impSIMDLongRelOpEqual(CORINFO_CLASS_HANDLE typeHnd, // Argumens: // typeHnd - type handle of SIMD vector // size - SIMD vector size -// op1 - in-out parameter; first operand -// op2 - in-out parameter; second operand +// pOp1 - in-out parameter; first operand +// pOp2 - in-out parameter; second operand // // Return Value: -// Modifies in-out params op1, op2 and returns intrinsic ID to be applied to modified operands +// Modifies in-out params pOp1, pOp2 and returns intrinsic ID to be applied to modified operands // SIMDIntrinsicID Compiler::impSIMDLongRelOpGreaterThan(CORINFO_CLASS_HANDLE typeHnd, unsigned size, - GenTree** op1, - GenTree** op2) + GenTree** pOp1, + GenTree** pOp2) { + var_types simdType = (*pOp1)->TypeGet(); + assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType)); + // GreaterThan(v1, v2) where v1 and v2 are vector long. // Let us consider the case of single long element comparison. // say L1 = (x1, y1) and L2 = (x2, y2) where x1, y1, x2, and y2 are 32-bit integers that comprise the longs L1 and L2. @@ -892,50 +811,50 @@ SIMDIntrinsicID Compiler::impSIMDLongRelOpGreaterThan(CORINFO_CLASS_HANDLE typeH GenTree* dupDupOp1 = nullptr; GenTree* dupDupOp2 = nullptr; - if (((*op1)->gtFlags & GTF_SIDE_EFFECT) != 0) + if (((*pOp1)->gtFlags & GTF_SIDE_EFFECT) != 0) { - dupOp1 = fgInsertCommaFormTemp(op1, typeHnd); - dupDupOp1 = gtNewLclvNode(dupOp1->AsLclVarCommon()->GetLclNum(), TYP_STRUCT); + dupOp1 = fgInsertCommaFormTemp(pOp1, typeHnd); + dupDupOp1 = gtNewLclvNode(dupOp1->AsLclVarCommon()->GetLclNum(), simdType); } else { - dupOp1 = gtCloneExpr(*op1); - dupDupOp1 = gtCloneExpr(*op1); + dupOp1 = gtCloneExpr(*pOp1); + dupDupOp1 = gtCloneExpr(*pOp1); } - if (((*op2)->gtFlags & GTF_SIDE_EFFECT) != 0) + if (((*pOp2)->gtFlags & GTF_SIDE_EFFECT) != 0) { - dupOp2 = fgInsertCommaFormTemp(op2, typeHnd); - dupDupOp2 = gtNewLclvNode(dupOp2->AsLclVarCommon()->GetLclNum(), TYP_STRUCT); + dupOp2 = fgInsertCommaFormTemp(pOp2, typeHnd); + dupDupOp2 = gtNewLclvNode(dupOp2->AsLclVarCommon()->GetLclNum(), simdType); } else { - dupOp2 = gtCloneExpr(*op2); - dupDupOp2 = gtCloneExpr(*op2); + dupOp2 = gtCloneExpr(*pOp2); + dupDupOp2 = gtCloneExpr(*pOp2); } assert(dupDupOp1 != nullptr && dupDupOp2 != nullptr); assert(dupOp1 != nullptr && dupOp2 != nullptr); - assert(*op1 != nullptr && *op2 != nullptr); + assert(*pOp1 != nullptr && *pOp2 != nullptr); // v1GreaterThanv2Signed - signed 32-bit comparison - GenTree* v1GreaterThanv2Signed = gtNewSIMDNode(TYP_STRUCT, *op1, *op2, SIMDIntrinsicGreaterThan, TYP_INT, size); + GenTree* v1GreaterThanv2Signed = gtNewSIMDNode(simdType, *pOp1, *pOp2, SIMDIntrinsicGreaterThan, TYP_INT, size); // v1Equalsv2 - 32-bit equality - GenTree* v1Equalsv2 = gtNewSIMDNode(TYP_STRUCT, dupOp1, dupOp2, SIMDIntrinsicEqual, TYP_INT, size); + GenTree* v1Equalsv2 = gtNewSIMDNode(simdType, dupOp1, dupOp2, SIMDIntrinsicEqual, TYP_INT, size); // v1GreaterThanv2Unsigned - unsigned 32-bit comparison var_types tempBaseType = TYP_UINT; SIMDIntrinsicID sid = impSIMDRelOp(SIMDIntrinsicGreaterThan, typeHnd, size, &tempBaseType, &dupDupOp1, &dupDupOp2); - GenTree* v1GreaterThanv2Unsigned = gtNewSIMDNode(TYP_STRUCT, dupDupOp1, dupDupOp2, sid, tempBaseType, size); + GenTree* v1GreaterThanv2Unsigned = gtNewSIMDNode(simdType, dupDupOp1, dupDupOp2, sid, tempBaseType, size); - GenTree* z = gtNewSIMDNode(TYP_STRUCT, v1GreaterThanv2Signed, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), SIMDIntrinsicShuffleSSE2, TYP_FLOAT, size); - GenTree* t1 = gtNewSIMDNode(TYP_STRUCT, v1GreaterThanv2Unsigned, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), SIMDIntrinsicShuffleSSE2, TYP_FLOAT, size); - GenTree* u1 = gtNewSIMDNode(TYP_STRUCT, v1Equalsv2, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), SIMDIntrinsicShuffleSSE2, TYP_FLOAT, size); - GenTree* w = gtNewSIMDNode(TYP_STRUCT, u1, t1, SIMDIntrinsicBitwiseAnd, TYP_INT, size); + GenTree* z = gtNewSIMDNode(simdType, v1GreaterThanv2Signed, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), SIMDIntrinsicShuffleSSE2, TYP_FLOAT, size); + GenTree* t1 = gtNewSIMDNode(simdType, v1GreaterThanv2Unsigned, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), SIMDIntrinsicShuffleSSE2, TYP_FLOAT, size); + GenTree* u1 = gtNewSIMDNode(simdType, v1Equalsv2, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), SIMDIntrinsicShuffleSSE2, TYP_FLOAT, size); + GenTree* w = gtNewSIMDNode(simdType, u1, t1, SIMDIntrinsicBitwiseAnd, TYP_INT, size); - *op1 = z; - *op2 = w; + *pOp1 = z; + *pOp2 = w; return SIMDIntrinsicBitwiseOr; } @@ -945,49 +864,52 @@ SIMDIntrinsicID Compiler::impSIMDLongRelOpGreaterThan(CORINFO_CLASS_HANDLE typeH // Argumens: // typeHnd - type handle of SIMD vector // size - SIMD vector size -// op1 - in-out parameter; first operand -// op2 - in-out parameter; second operand +// pOp1 - in-out parameter; first operand +// pOp2 - in-out parameter; second operand // // Return Value: -// Modifies in-out params op1, op2 and returns intrinsic ID to be applied to modified operands +// Modifies in-out params pOp1, pOp2 and returns intrinsic ID to be applied to modified operands // SIMDIntrinsicID Compiler::impSIMDLongRelOpGreaterThanOrEqual(CORINFO_CLASS_HANDLE typeHnd, unsigned size, - GenTree** op1, - GenTree** op2) + GenTree** pOp1, + GenTree** pOp2) { + var_types simdType = (*pOp1)->TypeGet(); + assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType)); + // expand this to (a == b) | (a > b) GenTree* dupOp1 = nullptr; GenTree* dupOp2 = nullptr; - if (((*op1)->gtFlags & GTF_SIDE_EFFECT) != 0) + if (((*pOp1)->gtFlags & GTF_SIDE_EFFECT) != 0) { - dupOp1 = fgInsertCommaFormTemp(op1, typeHnd); + dupOp1 = fgInsertCommaFormTemp(pOp1, typeHnd); } else { - dupOp1 = gtCloneExpr(*op1); + dupOp1 = gtCloneExpr(*pOp1); } - if (((*op2)->gtFlags & GTF_SIDE_EFFECT) != 0) + if (((*pOp2)->gtFlags & GTF_SIDE_EFFECT) != 0) { - dupOp2 = fgInsertCommaFormTemp(op2, typeHnd); + dupOp2 = fgInsertCommaFormTemp(pOp2, typeHnd); } else { - dupOp2 = gtCloneExpr(*op2); + dupOp2 = gtCloneExpr(*pOp2); } assert(dupOp1 != nullptr && dupOp2 != nullptr); - assert(*op1 != nullptr && *op2 != nullptr); + assert(*pOp1 != nullptr && *pOp2 != nullptr); // (a==b) - SIMDIntrinsicID id = impSIMDLongRelOpEqual(typeHnd, size, op1, op2); - *op1 = gtNewSIMDNode(TYP_STRUCT, *op1, *op2, id, TYP_LONG, size); + SIMDIntrinsicID id = impSIMDLongRelOpEqual(typeHnd, size, pOp1, pOp2); + *pOp1 = gtNewSIMDNode(simdType, *pOp1, *pOp2, id, TYP_LONG, size); // (a > b) id = impSIMDLongRelOpGreaterThan(typeHnd, size, &dupOp1, &dupOp2); - *op2 = gtNewSIMDNode(TYP_STRUCT, dupOp1, dupOp2, id, TYP_LONG, size); + *pOp2 = gtNewSIMDNode(simdType, dupOp1, dupOp2, id, TYP_LONG, size); return SIMDIntrinsicBitwiseOr; } @@ -999,56 +921,59 @@ SIMDIntrinsicID Compiler::impSIMDLongRelOpGreaterThanOrEqual(CORINFO_CLASS_HANDL // typeHnd - type handle of SIMD vector // size - SIMD vector size // baseType - base type of SIMD vector -// op1 - in-out parameter; first operand -// op2 - in-out parameter; second operand +// pOp1 - in-out parameter; first operand +// pOp2 - in-out parameter; second operand // // Return Value: -// Modifies in-out params op1, op2 and returns intrinsic ID to be applied to modified operands +// Modifies in-out params pOp1, pOp2 and returns intrinsic ID to be applied to modified operands // SIMDIntrinsicID Compiler::impSIMDIntegralRelOpGreaterThanOrEqual(CORINFO_CLASS_HANDLE typeHnd, unsigned size, var_types baseType, - GenTree** op1, - GenTree** op2) + GenTree** pOp1, + GenTree** pOp2) { + var_types simdType = (*pOp1)->TypeGet(); + assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType)); + // This routine should be used only for integer base type vectors assert(varTypeIsIntegral(baseType)); if ((getSIMDInstructionSet() == InstructionSet_SSE2) && ((baseType == TYP_LONG) || baseType == TYP_UBYTE)) { - return impSIMDLongRelOpGreaterThanOrEqual(typeHnd, size, op1, op2); + return impSIMDLongRelOpGreaterThanOrEqual(typeHnd, size, pOp1, pOp2); } // expand this to (a == b) | (a > b) GenTree* dupOp1 = nullptr; GenTree* dupOp2 = nullptr; - if (((*op1)->gtFlags & GTF_SIDE_EFFECT) != 0) + if (((*pOp1)->gtFlags & GTF_SIDE_EFFECT) != 0) { - dupOp1 = fgInsertCommaFormTemp(op1, typeHnd); + dupOp1 = fgInsertCommaFormTemp(pOp1, typeHnd); } else { - dupOp1 = gtCloneExpr(*op1); + dupOp1 = gtCloneExpr(*pOp1); } - if (((*op2)->gtFlags & GTF_SIDE_EFFECT) != 0) + if (((*pOp2)->gtFlags & GTF_SIDE_EFFECT) != 0) { - dupOp2 = fgInsertCommaFormTemp(op2, typeHnd); + dupOp2 = fgInsertCommaFormTemp(pOp2, typeHnd); } else { - dupOp2 = gtCloneExpr(*op2); + dupOp2 = gtCloneExpr(*pOp2); } assert(dupOp1 != nullptr && dupOp2 != nullptr); - assert(*op1 != nullptr && *op2 != nullptr); + assert(*pOp1 != nullptr && *pOp2 != nullptr); // (a==b) - *op1 = gtNewSIMDNode(TYP_STRUCT, *op1, *op2, SIMDIntrinsicEqual, baseType, size); + *pOp1 = gtNewSIMDNode(simdType, *pOp1, *pOp2, SIMDIntrinsicEqual, baseType, size); // (a > b) - *op2 = gtNewSIMDNode(TYP_STRUCT, dupOp1, dupOp2, SIMDIntrinsicGreaterThan, baseType, size); + *pOp2 = gtNewSIMDNode(simdType, dupOp1, dupOp2, SIMDIntrinsicGreaterThan, baseType, size); return SIMDIntrinsicBitwiseOr; } @@ -1062,19 +987,22 @@ SIMDIntrinsicID Compiler::impSIMDIntegralRelOpGreaterThanOrEqual(CORINFO_CLASS_H // typeHnd - type handle of SIMD vector // size - SIMD vector size // inOutBaseType - base type of SIMD vector -// op1 - in-out parameter; first operand -// op2 - in-out parameter; second operand +// pOp1 - in-out parameter; first operand +// pOp2 - in-out parameter; second operand // // Return Value: -// Modifies in-out params op1, op2, inOutBaseType and returns intrinsic ID to be applied to modified operands +// Modifies in-out params pOp1, pOp2, inOutBaseType and returns intrinsic ID to be applied to modified operands // SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, CORINFO_CLASS_HANDLE typeHnd, unsigned size, var_types* inOutBaseType, - GenTree** op1, - GenTree** op2) + GenTree** pOp1, + GenTree** pOp2) { + var_types simdType = (*pOp1)->TypeGet(); + assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType)); + assert(isRelOpSIMDIntrinsic(relOpIntrinsicId)); #ifdef _TARGET_AMD64_ @@ -1088,9 +1016,9 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, if (relOpIntrinsicId == SIMDIntrinsicGreaterThan || relOpIntrinsicId == SIMDIntrinsicGreaterThanOrEqual) { - GenTree* tmp = *op1; - *op1 = *op2; - *op2 = tmp; + GenTree* tmp = *pOp1; + *pOp1 = *pOp2; + *pOp2 = tmp; intrinsicID = (relOpIntrinsicId == SIMDIntrinsicGreaterThan) ? SIMDIntrinsicLessThan : SIMDIntrinsicLessThanOrEqual; @@ -1103,9 +1031,9 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, if (intrinsicID == SIMDIntrinsicLessThan || intrinsicID == SIMDIntrinsicLessThanOrEqual) { - GenTree* tmp = *op1; - *op1 = *op2; - *op2 = tmp; + GenTree* tmp = *pOp1; + *pOp1 = *pOp2; + *pOp2 = tmp; intrinsicID = (relOpIntrinsicId == SIMDIntrinsicLessThan) ? SIMDIntrinsicGreaterThan : SIMDIntrinsicGreaterThanOrEqual; @@ -1117,15 +1045,15 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, // These have to be implemented interms of TYP_INT vector comparison operations. if (intrinsicID == SIMDIntrinsicEqual) { - intrinsicID = impSIMDLongRelOpEqual(typeHnd, size, op1, op2); + intrinsicID = impSIMDLongRelOpEqual(typeHnd, size, pOp1, pOp2); } else if (intrinsicID == SIMDIntrinsicGreaterThan) { - intrinsicID = impSIMDLongRelOpGreaterThan(typeHnd, size, op1, op2); + intrinsicID = impSIMDLongRelOpGreaterThan(typeHnd, size, pOp1, pOp2); } else if (intrinsicID == SIMDIntrinsicGreaterThanOrEqual) { - intrinsicID = impSIMDLongRelOpGreaterThanOrEqual(typeHnd, size, op1, op2); + intrinsicID = impSIMDLongRelOpGreaterThanOrEqual(typeHnd, size, pOp1, pOp2); } else { @@ -1137,7 +1065,7 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, { if (intrinsicID == SIMDIntrinsicGreaterThanOrEqual) { - intrinsicID = impSIMDIntegralRelOpGreaterThanOrEqual(typeHnd, size, baseType, op1, op2); + intrinsicID = impSIMDIntegralRelOpGreaterThanOrEqual(typeHnd, size, baseType, pOp1, pOp2); } } else // unsigned @@ -1185,7 +1113,7 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, var_types tempBaseType = (baseType == TYP_ULONG) ? TYP_LONG : TYP_INT; GenTree* initVal = gtNewIconNode(constVal); initVal->gtType = tempBaseType; - GenTree* constVector = gtNewSIMDNode(TYP_STRUCT, initVal, nullptr, SIMDIntrinsicInit, tempBaseType, size); + GenTree* constVector = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, tempBaseType, size); // Assign constVector to a temp, since we intend to use it more than once // TODO-CQ: We have quite a few such constant vectors constructed during @@ -1195,11 +1123,11 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, // op1 = op1 - constVector // op2 = op2 - constVector - *op1 = gtNewSIMDNode(TYP_STRUCT, *op1, constVector, SIMDIntrinsicSub, baseType, size); - *op2 = gtNewSIMDNode(TYP_STRUCT, *op2, tmp, SIMDIntrinsicSub, baseType, size); + *pOp1 = gtNewSIMDNode(simdType, *pOp1, constVector, SIMDIntrinsicSub, baseType, size); + *pOp2 = gtNewSIMDNode(simdType, *pOp2, tmp, SIMDIntrinsicSub, baseType, size); } - return impSIMDRelOp(intrinsicID, typeHnd, size, inOutBaseType, op1, op2); + return impSIMDRelOp(intrinsicID, typeHnd, size, inOutBaseType, pOp1, pOp2); } } @@ -1230,9 +1158,11 @@ GenTreePtr Compiler::impSIMDSelect(CORINFO_CLASS_HANDLE typeHnd, GenTree* op2, GenTree* op3) { - assert(op1->TypeGet() == TYP_STRUCT); - assert(op2->TypeGet() == TYP_STRUCT); - assert(op3->TypeGet() == TYP_STRUCT); + assert(varTypeIsSIMD(op1)); + var_types simdType = op1->TypeGet(); + assert(op2->TypeGet() == simdType); + assert(op3->TypeGet() == simdType); + // Select(BitVector vc, va, vb) = (va & vc) | (vb & !vc) // Select(op1, op2, op3) = (op2 & op1) | (op3 & !op1) @@ -1249,11 +1179,11 @@ GenTreePtr Compiler::impSIMDSelect(CORINFO_CLASS_HANDLE typeHnd, asg = gtNewTempAssign(lclNum, op1); } - GenTree* andExpr = gtNewSIMDNode(TYP_STRUCT, op2, tmp, SIMDIntrinsicBitwiseAnd, baseType, size); + GenTree* andExpr = gtNewSIMDNode(simdType, op2, tmp, SIMDIntrinsicBitwiseAnd, baseType, size); GenTree* dupOp1 = gtCloneExpr(tmp); assert(dupOp1 != nullptr); - GenTree* andNotExpr = gtNewSIMDNode(TYP_STRUCT, dupOp1, op3, SIMDIntrinsicBitwiseAndNot, baseType, size); - GenTree* simdTree = gtNewSIMDNode(TYP_STRUCT, andExpr, andNotExpr, SIMDIntrinsicBitwiseOr, baseType, size); + GenTree* andNotExpr = gtNewSIMDNode(simdType, dupOp1, op3, SIMDIntrinsicBitwiseAndNot, baseType, size); + GenTree* simdTree = gtNewSIMDNode(simdType, andExpr, andNotExpr, SIMDIntrinsicBitwiseOr, baseType, size); // If asg not null, create a GT_COMMA tree. if (asg != nullptr) @@ -1285,8 +1215,9 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId, GenTree* op2) { assert(intrinsicId == SIMDIntrinsicMin || intrinsicId == SIMDIntrinsicMax); - assert(op1->TypeGet() == TYP_STRUCT); - assert(op2->TypeGet() == TYP_STRUCT); + assert(varTypeIsSIMD(op1)); + var_types simdType = op1->TypeGet(); + assert(op2->TypeGet() == simdType); #ifdef _TARGET_AMD64_ // SSE2 has direct support for float/double/signed word/unsigned byte. @@ -1314,7 +1245,7 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId, if (varTypeIsFloating(baseType) || baseType == TYP_SHORT || baseType == TYP_UBYTE) { // SSE2 has direct support - simdTree = gtNewSIMDNode(TYP_STRUCT, op1, op2, intrinsicId, baseType, size); + simdTree = gtNewSIMDNode(simdType, op1, op2, intrinsicId, baseType, size); } else if (baseType == TYP_CHAR || baseType == TYP_BYTE) { @@ -1339,7 +1270,7 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId, } GenTree* initVal = gtNewIconNode(constVal); - GenTree* constVector = gtNewSIMDNode(TYP_STRUCT, initVal, nullptr, SIMDIntrinsicInit, TYP_INT, size); + GenTree* constVector = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, TYP_INT, size); // Assign constVector to a temp, since we intend to use it more than once // TODO-CQ: We have quite a few such constant vectors constructed during @@ -1349,15 +1280,15 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId, // op1 = op1 - constVector // op2 = op2 - constVector - op1 = gtNewSIMDNode(TYP_STRUCT, op1, constVector, operIntrinsic, baseType, size); - op2 = gtNewSIMDNode(TYP_STRUCT, op2, tmp, operIntrinsic, baseType, size); + op1 = gtNewSIMDNode(simdType, op1, constVector, operIntrinsic, baseType, size); + op2 = gtNewSIMDNode(simdType, op2, tmp, operIntrinsic, baseType, size); // compute min/max of op1 and op2 considering them as if minMaxOperBaseType - simdTree = gtNewSIMDNode(TYP_STRUCT, op1, op2, intrinsicId, minMaxOperBaseType, size); + simdTree = gtNewSIMDNode(simdType, op1, op2, intrinsicId, minMaxOperBaseType, size); // re-adjust the value by adding or subtracting constVector tmp = gtNewLclvNode(tmp->AsLclVarCommon()->GetLclNum(), tmp->TypeGet()); - simdTree = gtNewSIMDNode(TYP_STRUCT, simdTree, tmp, adjustIntrinsic, baseType, size); + simdTree = gtNewSIMDNode(simdType, simdTree, tmp, adjustIntrinsic, baseType, size); } else { @@ -1402,7 +1333,7 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId, assert(dupOp1 != nullptr); assert(dupOp2 != nullptr); relOpIntrinsic = impSIMDRelOp(relOpIntrinsic, typeHnd, size, &relOpBaseType, &dupOp1, &dupOp2); - GenTree* compResult = gtNewSIMDNode(TYP_STRUCT, dupOp1, dupOp2, relOpIntrinsic, relOpBaseType, size); + GenTree* compResult = gtNewSIMDNode(simdType, dupOp1, dupOp2, relOpIntrinsic, relOpBaseType, size); unsigned compResultLclNum = lvaGrabTemp(true DEBUGARG("SIMD Min/Max")); lvaSetStruct(compResultLclNum, typeHnd, false); GenTree* compResultAssign = gtNewTempAssign(compResultLclNum, compResult); @@ -1461,7 +1392,7 @@ GenTreePtr Compiler::getOp1ForConstructor(OPCODE opcode, } else { - op1 = impSIMDPopStack(); + op1 = impSIMDPopStack(TYP_BYREF); } assert(op1->TypeGet() == TYP_BYREF); return op1; @@ -1674,7 +1605,7 @@ GenTreePtr Compiler::createAddressNodeForSIMDInit(GenTreePtr tree, unsigned simd // TODO-CQ: // In future, we should optimize this case so that if there is a nested field like s1.s2.x and s1.s2.x's address is used for // initializing the vector, then s1 can be promoted but s2 can't. - if(obj->gtType == TYP_STRUCT && obj->OperIsLocal()) + if(varTypeIsSIMD(obj) && obj->OperIsLocal()) { setLclRelatedToSIMDIntrinsic(obj); } @@ -1727,7 +1658,10 @@ GenTreePtr Compiler::createAddressNodeForSIMDInit(GenTreePtr tree, unsigned simd void Compiler::impMarkContiguousSIMDFieldAssignments(GenTreePtr stmt) { - + if (!featureSIMD || opts.MinOpts()) + { + return; + } GenTreePtr expr = stmt->gtStmt.gtStmtExpr; if (expr->OperGet() == GT_ASG && expr->TypeGet() == TYP_FLOAT) @@ -1774,7 +1708,7 @@ void Compiler::impMarkContiguousSIMDFieldAssignments(GenTreePtr stmt) if (objRef != nullptr && objRef->gtOper == GT_ADDR) { GenTreePtr obj = objRef->gtOp.gtOp1; - if (obj->gtType == TYP_STRUCT && obj->OperIsLocal()) + if (varTypeIsStruct(obj) && obj->OperIsLocal()) { setLclRelatedToSIMDIntrinsic(obj); } @@ -1845,8 +1779,26 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, } SIMDIntrinsicID simdIntrinsicID = intrinsicInfo->id; + var_types simdType; + if (baseType != TYP_UNKNOWN) + { + simdType = getSIMDTypeForSize(size); + } + else + { + assert(simdIntrinsicID == SIMDIntrinsicHWAccel); + simdType = TYP_UNKNOWN; + } bool instMethod = intrinsicInfo->isInstMethod; var_types callType = JITtype2varType(sig->retType); + if (callType == TYP_STRUCT) + { + // Note that here we are assuming that, if the call returns a struct, that it is the same size as the + // struct on which the method is declared. This is currently true for all methods on Vector types, + // but if this ever changes, we will need to determine the callType from the signature. + assert(info.compCompHnd->getClassSize(sig->retTypeClass) == genTypeSize(simdType)); + callType = simdType; + } GenTree* simdTree = nullptr; GenTree* op1 = nullptr; @@ -1871,7 +1823,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, baseType = genActualType(baseType); GenTree *initVal = gtNewZeroConNode(baseType); initVal->gtType = baseType; - simdTree = gtNewSIMDNode(TYP_STRUCT, initVal, nullptr, SIMDIntrinsicInit, baseType, size); + simdTree = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, baseType, size); retVal = simdTree; } break; @@ -1900,7 +1852,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, baseType = genActualType(baseType); initVal->gtType = baseType; - simdTree = gtNewSIMDNode(TYP_STRUCT, initVal, nullptr, SIMDIntrinsicInit, baseType, size); + simdTree = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, baseType, size); retVal = simdTree; } break; @@ -1909,11 +1861,11 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, { // Equivalent to (Vector<T>) new Vector<int>(0xffffffff); GenTree *initVal = gtNewIconNode(0xffffffff, TYP_INT); - simdTree = gtNewSIMDNode(TYP_STRUCT, initVal, nullptr, SIMDIntrinsicInit, TYP_INT, size); + simdTree = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, TYP_INT, size); if (baseType != TYP_INT) { // cast it to required baseType if different from TYP_INT - simdTree = gtNewSIMDNode(genActualType(callType), simdTree, nullptr, SIMDIntrinsicCast, baseType, size); + simdTree = gtNewSIMDNode(simdType, simdTree, nullptr, SIMDIntrinsicCast, baseType, size); } retVal = simdTree; } @@ -1932,7 +1884,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, bool initFromFirstArgIndir = false; if (simdIntrinsicID == SIMDIntrinsicInit) { - op2 = impSIMDPopStack(); + op2 = impSIMDPopStack(baseType); } else { @@ -1955,8 +1907,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, bool areArgsContiguous = true; for (unsigned i = 0; i < initCount; i++) { - GenTree* nextArg = impSIMDPopStack(); - assert(nextArg->TypeGet() == baseType); + GenTree* nextArg = impSIMDPopStack(baseType); if (areArgsContiguous) { GenTreePtr curArg = nextArg; @@ -1997,8 +1948,8 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, #if AVX_WITHOUT_AVX2 // NOTE: This #define, AVX_WITHOUT_AVX2, is never defined. This code is kept here // in case we decide to implement AVX support (32 byte vectors) with AVX only. - // On AVX (as opposed to AVX2), broadcast is supported only for float and double, - // and requires taking a mem address of the value. + // On AVX (as opposed to AVX2), broadcast is supported only for float and double, + // and requires taking a mem address of the value. // If not a constant, take the addr of op2. if (simdIntrinsicID == SIMDIntrinsicInit && canUseAVX()) { @@ -2064,8 +2015,8 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, op2 = gtNewOperNode(GT_MUL, TYP_INT, t1, t2); // Construct a vector of TYP_INT with the new initializer and cast it back to vector of baseType - simdTree = gtNewSIMDNode(TYP_STRUCT, op2, nullptr, simdIntrinsicID, TYP_INT, size); - simdTree = gtNewSIMDNode(TYP_STRUCT, simdTree, nullptr, SIMDIntrinsicCast, baseType, size); + simdTree = gtNewSIMDNode(simdType, op2, nullptr, simdIntrinsicID, TYP_INT, size); + simdTree = gtNewSIMDNode(simdType, simdTree, nullptr, SIMDIntrinsicCast, baseType, size); } else { @@ -2082,7 +2033,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, } else { - simdTree = gtNewSIMDNode(TYP_STRUCT, op2, nullptr, simdIntrinsicID, baseType, size); + simdTree = gtNewSIMDNode(simdType, op2, nullptr, simdIntrinsicID, baseType, size); } } @@ -2117,7 +2068,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, // top of the stack. Otherwise, it is null. if (argCount == 3) { - op3 = impSIMDPopStack(); + op3 = impSIMDPopStack(TYP_INT); if (op3->IsZero()) { op3 = nullptr; @@ -2131,8 +2082,8 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, op3 = nullptr; } - // Clone the array for use in the check(s). - op2 = impSIMDPopStack(); + // Clone the array for use in the bounds check. + op2 = impSIMDPopStack(TYP_REF); assert(op2->TypeGet() == TYP_REF); GenTree* arrayRefForArgChk = op2; GenTree* argRngChk = nullptr; @@ -2207,15 +2158,15 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, if (simdIntrinsicID == SIMDIntrinsicInitArray || simdIntrinsicID == SIMDIntrinsicInitArrayX) { op1 = getOp1ForConstructor(opcode, newobjThis, clsHnd); - simdTree = gtNewSIMDNode(TYP_STRUCT, op2, op3, SIMDIntrinsicInitArray, baseType, size); + simdTree = gtNewSIMDNode(simdType, op2, op3, SIMDIntrinsicInitArray, baseType, size); copyBlkDst = op1; doCopyBlk = true; } else { assert(simdIntrinsicID == SIMDIntrinsicCopyToArray || simdIntrinsicID == SIMDIntrinsicCopyToArrayX); - op1 = impSIMDPopStack(instMethod); - assert(op1->TypeGet() == TYP_STRUCT); + op1 = impSIMDPopStack(simdType, instMethod); + assert(op1->TypeGet() == simdType); // copy vector (op1) to array (op2) starting at index (op3) simdTree = op1; @@ -2240,13 +2191,18 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, GenTree* op4 = nullptr; if (argCount == 4) { - op4 = impSIMDPopStack(); + op4 = impSIMDPopStack(TYP_FLOAT); assert(op4->TypeGet() == TYP_FLOAT); } - op3 = impSIMDPopStack(); + op3 = impSIMDPopStack(TYP_FLOAT); assert(op3->TypeGet() == TYP_FLOAT); - op2 = impSIMDPopStack(); - assert(op2->TypeGet() == TYP_STRUCT); + // The input vector will either be TYP_SIMD8 or TYP_SIMD12. + var_types smallSIMDType = TYP_SIMD8; + if ((op4 == nullptr) && (simdType == TYP_SIMD16)) + { + smallSIMDType = TYP_SIMD12; + } + op2 = impSIMDPopStack(smallSIMDType); op1 = getOp1ForConstructor(opcode, newobjThis, clsHnd); // We are going to redefine the operands so that: @@ -2261,11 +2217,11 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, simdTree = op2; if (op3 != nullptr) { - simdTree = gtNewSIMDNode(TYP_STRUCT, simdTree, op3, SIMDIntrinsicSetZ, baseType, size); + simdTree = gtNewSIMDNode(simdType, simdTree, op3, SIMDIntrinsicSetZ, baseType, size); } if (op4 != nullptr) { - simdTree = gtNewSIMDNode(TYP_STRUCT, simdTree, op4, SIMDIntrinsicSetW, baseType, size); + simdTree = gtNewSIMDNode(simdType, simdTree, op4, SIMDIntrinsicSetW, baseType, size); } copyBlkDst = op1; @@ -2276,11 +2232,11 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, case SIMDIntrinsicOpEquality: case SIMDIntrinsicInstEquals: { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(instMethod); + op2 = impSIMDPopStack(simdType); + op1 = impSIMDPopStack(simdType, instMethod); - assert(op1->TypeGet() == TYP_STRUCT); - assert(op2->TypeGet() == TYP_STRUCT); + assert(op1->TypeGet() == simdType); + assert(op2->TypeGet() == simdType); simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpEquality, baseType, size); retVal = simdTree; @@ -2291,11 +2247,11 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, { // op1 is the first operand // op2 is the second operand - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(instMethod); + op2 = impSIMDPopStack(simdType); + op1 = impSIMDPopStack(simdType, instMethod); - assert(op1->TypeGet() == TYP_STRUCT); - assert(op2->TypeGet() == TYP_STRUCT); + assert(op1->TypeGet() == simdType); + assert(op2->TypeGet() == simdType); simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpInEquality, baseType, size); retVal = simdTree; @@ -2308,11 +2264,11 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, case SIMDIntrinsicGreaterThan: case SIMDIntrinsicGreaterThanOrEqual: { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(instMethod); + op2 = impSIMDPopStack(simdType); + op1 = impSIMDPopStack(simdType, instMethod); - assert(op1->TypeGet() == TYP_STRUCT); - assert(op2->TypeGet() == TYP_STRUCT); + assert(op1->TypeGet() == simdType); + assert(op2->TypeGet() == simdType); SIMDIntrinsicID intrinsicID = impSIMDRelOp(simdIntrinsicID, clsHnd, size, &baseType, &op1, &op2); simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, intrinsicID, baseType, size); @@ -2361,13 +2317,10 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, // op1 is the first operand; if instance method, op1 is "this" arg // op2 is the second operand - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(instMethod); - - assert(op1->TypeGet() == TYP_STRUCT); - assert(op2->TypeGet() == TYP_STRUCT); + op2 = impSIMDPopStack(simdType); + op1 = impSIMDPopStack(simdType, instMethod); - simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, simdIntrinsicID, baseType, size); + simdTree = gtNewSIMDNode(simdType, op1, op2, simdIntrinsicID, baseType, size); retVal = simdTree; } break; @@ -2377,9 +2330,9 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, // op3 is a SIMD variable that is the second source // op2 is a SIMD variable that is the first source // op1 is a SIMD variable which is the bit mask. - op3 = impSIMDPopStack(); - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); + op3 = impSIMDPopStack(simdType); + op2 = impSIMDPopStack(simdType); + op1 = impSIMDPopStack(simdType); retVal = impSIMDSelect(clsHnd, baseType, size, op1, op2, op3); } @@ -2390,8 +2343,8 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, { // op1 is the first operand; if instance method, op1 is "this" arg // op2 is the second operand - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(instMethod); + op2 = impSIMDPopStack(simdType); + op1 = impSIMDPopStack(simdType, instMethod); retVal = impSIMDMinMax(simdIntrinsicID, clsHnd, baseType, size, op1, op2); } @@ -2401,8 +2354,8 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, { // op1 is a SIMD variable that is "this" arg // op2 is an index of TYP_INT - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(instMethod); + op2 = impSIMDPopStack(TYP_INT); + op1 = impSIMDPopStack(simdType, instMethod); unsigned int vectorLength = getSIMDVectorLength(size, baseType); if (!op2->IsCnsIntOrI() || op2->AsIntCon()->gtIconVal >= vectorLength) { @@ -2425,7 +2378,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, op2 = gtNewOperNode(GT_COMMA, op2->TypeGet(), simdChk, op2); } - assert(op1->TypeGet() == TYP_STRUCT); + assert(op1->TypeGet() == simdType); assert(op2->TypeGet() == TYP_INT); simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, simdIntrinsicID, baseType, size); @@ -2447,11 +2400,8 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, // op1 is a SIMD variable that is the first source and also "this" arg. // op2 is a SIMD variable which is the second source. - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(instMethod); - - assert(op1->TypeGet() == TYP_STRUCT); - assert(op2->TypeGet() == TYP_STRUCT); + op2 = impSIMDPopStack(simdType); + op1 = impSIMDPopStack(simdType, instMethod); simdTree = gtNewSIMDNode(baseType, op1, op2, simdIntrinsicID, baseType, size); retVal = simdTree; @@ -2471,8 +2421,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, } #endif // _TARGET_AMD64_ && DEBUG - op1 = impSIMDPopStack(); - assert(op1->TypeGet() == TYP_STRUCT); + op1 = impSIMDPopStack(simdType); retVal = gtNewSIMDNode(genActualType(callType), op1, nullptr, simdIntrinsicID, baseType, size); } @@ -2480,8 +2429,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, case SIMDIntrinsicAbs: { - op1 = impSIMDPopStack(); - assert(op1->TypeGet() == TYP_STRUCT); + op1 = impSIMDPopStack(simdType); #ifdef _TARGET_AMD64_ if (varTypeIsFloating(baseType)) @@ -2506,8 +2454,8 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, assert(bitMask != nullptr); bitMask->gtType = baseType; - GenTree* bitMaskVector = gtNewSIMDNode(TYP_STRUCT, bitMask, SIMDIntrinsicInit, baseType, size); - retVal = gtNewSIMDNode(TYP_STRUCT, op1, bitMaskVector, SIMDIntrinsicBitwiseAnd, baseType, size); + GenTree* bitMaskVector = gtNewSIMDNode(simdType, bitMask, SIMDIntrinsicInit, baseType, size); + retVal = gtNewSIMDNode(simdType, op1, bitMaskVector, SIMDIntrinsicBitwiseAnd, baseType, size); } else if (baseType == TYP_CHAR || baseType == TYP_UBYTE || baseType == TYP_UINT || baseType == TYP_ULONG) { @@ -2530,19 +2478,19 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, break; case SIMDIntrinsicGetW: - retVal = impSIMDGetFixed(baseType, size, 3); + retVal = impSIMDGetFixed(simdType, baseType, size, 3); break; case SIMDIntrinsicGetZ: - retVal = impSIMDGetFixed(baseType, size, 2); + retVal = impSIMDGetFixed(simdType, baseType, size, 2); break; case SIMDIntrinsicGetY: - retVal = impSIMDGetFixed(baseType, size, 1); + retVal = impSIMDGetFixed(simdType, baseType, size, 1); break; case SIMDIntrinsicGetX: - retVal = impSIMDGetFixed(baseType, size, 0); + retVal = impSIMDGetFixed(simdType, baseType, size, 0); break; case SIMDIntrinsicSetW: @@ -2564,15 +2512,12 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, return nullptr; } - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(instMethod); - - assert(op1->TypeGet() == TYP_STRUCT); - assert(op2->TypeGet() == baseType); + op2 = impSIMDPopStack(baseType); + op1 = impSIMDPopStack(simdType, instMethod); GenTree* src = gtCloneExpr(op1); assert(src != nullptr); - simdTree = gtNewSIMDNode(TYP_STRUCT, src, op2, simdIntrinsicID, baseType, size); + simdTree = gtNewSIMDNode(simdType, src, op2, simdIntrinsicID, baseType, size); copyBlkDst = gtNewOperNode(GT_ADDR, TYP_BYREF, op1); doCopyBlk = true; @@ -2582,10 +2527,9 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, // Unary operators that take and return a Vector. case SIMDIntrinsicCast: { - op1 = impSIMDPopStack(instMethod); - assert(op1->TypeGet() == TYP_STRUCT); + op1 = impSIMDPopStack(simdType, instMethod); - simdTree = gtNewSIMDNode(genActualType(callType), op1, nullptr, simdIntrinsicID, baseType, size); + simdTree = gtNewSIMDNode(simdType, op1, nullptr, simdIntrinsicID, baseType, size); retVal = simdTree; } break; diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp index a6ca3ee6bc..5faf61f61b 100644 --- a/src/jit/simdcodegenxarch.cpp +++ b/src/jit/simdcodegenxarch.cpp @@ -60,33 +60,33 @@ CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, switch(intrinsicId) { case SIMDIntrinsicInit: - if (compiler->canUseAVX()) - { - // AVX supports broadcast instructions to populate YMM reg with a single float/double value from memory. - // AVX2 supports broadcast instructions to populate YMM reg with a single value from memory or mm reg. - // If we decide to use AVX2 only, we can remove this assert. - if ((compiler->opts.eeFlags & CORJIT_FLG_USE_AVX2) == 0) - { - assert(baseType == TYP_FLOAT || baseType == TYP_DOUBLE); - } - switch (baseType) - { - case TYP_FLOAT: result = INS_vbroadcastss; break; - case TYP_DOUBLE: result = INS_vbroadcastsd; break; - case TYP_ULONG: __fallthrough; - case TYP_LONG: result = INS_vpbroadcastq; break; - case TYP_UINT: __fallthrough; - case TYP_INT: result = INS_vpbroadcastd; break; - case TYP_CHAR: __fallthrough; - case TYP_SHORT: result = INS_vpbroadcastw; break; - case TYP_UBYTE: __fallthrough; - case TYP_BYTE: result = INS_vpbroadcastb; break; - default: unreached(); - } - break; - } + if (compiler->canUseAVX()) + { + // AVX supports broadcast instructions to populate YMM reg with a single float/double value from memory. + // AVX2 supports broadcast instructions to populate YMM reg with a single value from memory or mm reg. + // If we decide to use AVX2 only, we can remove this assert. + if ((compiler->opts.eeFlags & CORJIT_FLG_USE_AVX2) == 0) + { + assert(baseType == TYP_FLOAT || baseType == TYP_DOUBLE); + } + switch (baseType) + { + case TYP_FLOAT: result = INS_vbroadcastss; break; + case TYP_DOUBLE: result = INS_vbroadcastsd; break; + case TYP_ULONG: __fallthrough; + case TYP_LONG: result = INS_vpbroadcastq; break; + case TYP_UINT: __fallthrough; + case TYP_INT: result = INS_vpbroadcastd; break; + case TYP_CHAR: __fallthrough; + case TYP_SHORT: result = INS_vpbroadcastw; break; + case TYP_UBYTE: __fallthrough; + case TYP_BYTE: result = INS_vpbroadcastb; break; + default: unreached(); + } + break; + } // For SSE, SIMDIntrinsicInit uses the same instruction as the SIMDIntrinsicShuffleSSE2 intrinsic. - __fallthrough; + __fallthrough; case SIMDIntrinsicShuffleSSE2: if (baseType == TYP_FLOAT) { @@ -286,7 +286,7 @@ CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, { result = INS_pcmpeqb; } - else if (compiler->canUseAVX() && (baseType == TYP_ULONG || baseType == TYP_LONG)) + else if (compiler->canUseAVX() && (baseType == TYP_ULONG || baseType == TYP_LONG)) { result = INS_pcmpeqq; } @@ -345,7 +345,7 @@ CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, { result = INS_pcmpgtb; } - else if (compiler->canUseAVX() && (baseType == TYP_LONG)) + else if (compiler->canUseAVX() && (baseType == TYP_LONG)) { result = INS_pcmpgtq; } @@ -453,7 +453,7 @@ CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, // type the type of value to be moved // targetReg the target reg // srcReg the src reg -// zeroInit true if the upper bits of targetReg should be zero'd +// zeroInit true if the upper bits of targetReg should be zero'd // // Return Value: // None @@ -470,13 +470,13 @@ CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber srcReg if (compiler->getSIMDInstructionSet() == InstructionSet_AVX) { if (zeroInit) - { + { // insertps is a 128-bit only instruction, and clears the upper 128 bits, which is what we want. // The insertpsImm selects which fields are copied and zero'd of the lower 128 bits, so we choose // to zero all but the lower bits. unsigned int insertpsImm = (INSERTPS_TARGET_SELECT(0)|INSERTPS_ZERO(1)|INSERTPS_ZERO(2)|INSERTPS_ZERO(3)); - inst_RV_RV_IV(INS_insertps, EA_16BYTE, targetReg, srcReg, insertpsImm); - } + inst_RV_RV_IV(INS_insertps, EA_16BYTE, targetReg, srcReg, insertpsImm); + } else if (srcReg != targetReg) { instruction ins = ins_Store(type); @@ -500,20 +500,20 @@ CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber srcReg if (zeroInit) { if (srcReg == targetReg) - { - // There is no guarantee that upper bits of op1Reg are zero. - // We achieve this by using left logical shift 12-bytes and right logical shift 12 bytes. - instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, type); - getEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12); - ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, type); - getEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12); - } + { + // There is no guarantee that upper bits of op1Reg are zero. + // We achieve this by using left logical shift 12-bytes and right logical shift 12 bytes. + instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, type); + getEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12); + ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, type); + getEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12); + } else - { - instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicBitwiseXor, type); + { + instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicBitwiseXor, type); inst_RV_RV(ins, targetReg, targetReg, targetType, emitTypeSize(targetType)); - inst_RV_RV(ins_Store(type), targetReg, srcReg); - } + inst_RV_RV(ins_Store(type), targetReg, srcReg); + } } else if (srcReg != targetReg) { @@ -568,7 +568,7 @@ CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) else { assert(iset == InstructionSet_AVX); - ins = getOpForSIMDIntrinsic(SIMDIntrinsicInit, baseType); + ins = getOpForSIMDIntrinsic(SIMDIntrinsicInit, baseType); if (op1->IsCnsFltOrDbl()) { getEmitter()->emitInsBinary(ins, emitTypeSize(targetType), simdNode, op1); @@ -586,20 +586,20 @@ CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) #endif // FEATURE_AVX_SUPPORT } else if (iset == InstructionSet_AVX && ((size == 32) || (size == 16))) - { - regNumber srcReg = genConsumeReg(op1); - if (baseType == TYP_INT || baseType == TYP_UINT || - baseType == TYP_LONG || baseType == TYP_ULONG) - { - ins = ins_CopyIntToFloat(baseType, TYP_FLOAT); - assert(ins != INS_invalid); - inst_RV_RV(ins, targetReg, srcReg, baseType, emitTypeSize(baseType)); - srcReg = targetReg; - } - - ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType); - getEmitter()->emitIns_R_R(ins, emitActualTypeSize(targetType), targetReg, srcReg); - } + { + regNumber srcReg = genConsumeReg(op1); + if (baseType == TYP_INT || baseType == TYP_UINT || + baseType == TYP_LONG || baseType == TYP_ULONG) + { + ins = ins_CopyIntToFloat(baseType, TYP_FLOAT); + assert(ins != INS_invalid); + inst_RV_RV(ins, targetReg, srcReg, baseType, emitTypeSize(baseType)); + srcReg = targetReg; + } + + ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType); + getEmitter()->emitIns_R_R(ins, emitActualTypeSize(targetType), targetReg, srcReg); + } else { // If we reach here, op1 is not contained and we are using SSE or it is a SubRegisterSIMDType. @@ -622,7 +622,7 @@ CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) // from memory would zero-out upper bits. In these cases we can // avoid explicitly zero'ing out targetReg. bool zeroInitRequired = !(op1->IsCnsFltOrDbl() || op1->isMemoryOp()); - genSIMDScalarMove(TYP_FLOAT, targetReg, op1Reg, zeroInitRequired); + genSIMDScalarMove(TYP_FLOAT, targetReg, op1Reg, zeroInitRequired); if (size == 8) { @@ -657,7 +657,7 @@ CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) } ins = getOpForSIMDIntrinsic(SIMDIntrinsicShuffleSSE2, baseType); - getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, targetReg, shuffleControl); + getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, targetReg, shuffleControl); } genProduceReg(simdNode); @@ -1043,7 +1043,7 @@ CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) if (varTypeIsFloating(baseType)) { - getEmitter()->emitIns_R_R_I(ins, emitTypeSize(baseType), targetReg, otherReg, ival); + getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, otherReg, ival); } else { @@ -1089,6 +1089,9 @@ CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask); regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask); var_types simdType = op1->TypeGet(); + // TODO-1stClassStructs: Temporary to minimize asmDiffs + if (simdType == TYP_DOUBLE) + simdType = TYP_SIMD8; // Here we should consider TYP_SIMD12 operands as if they were TYP_SIMD16 // since both the operands will be in XMM registers. @@ -1141,7 +1144,7 @@ CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) inst_RV_RV(INS_andps, tmpReg1, tmpReg2, simdType, emitActualTypeSize(simdType)); } // Next, if we have more than 8 bytes, and the two 8-byte halves to get a 8-byte result. - if (simdType != TYP_DOUBLE) + if (simdType != TYP_SIMD8) { // tmpReg2 = Shuffle(tmpReg1, (1,0,3,2)) // Note: vpshufd is a 128-bit only instruction. Therefore, explicitly pass EA_16BYTE @@ -1155,7 +1158,7 @@ CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) inst_RV_RV(INS_andps, tmpReg1, tmpReg2, simdType, emitActualTypeSize(simdType)); } // At this point, we have either reduced the result to 8 bytes: tmpReg1[0] and tmpReg1[1], - // OR we have a Vector2 (TYPSIMD8 aka TYP_DOUBLE) in tmpReg1, which has only those two fields. + // OR we have a Vector2 (TYP_SIMD8) in tmpReg1, which has only those two fields. // tmpReg2 = Shuffle(tmpReg1, (0,0,0,1)) // tmpReg2[0] = compResult[1] & compResult[3] @@ -1215,6 +1218,9 @@ CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) GenTree* op2 = simdNode->gtGetOp2(); var_types baseType = simdNode->gtSIMDBaseType; var_types simdType = op1->TypeGet(); + // TODO-1stClassStructs: Temporary to minimize asmDiffs + if (simdType == TYP_DOUBLE) + simdType = TYP_SIMD8; var_types simdEvalType = (simdType == TYP_SIMD12) ? TYP_SIMD16 : simdType; regNumber targetReg = simdNode->gtRegNum; assert(targetReg != REG_NA); @@ -1386,7 +1392,7 @@ CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) GenTree* op1 = simdNode->gtGetOp1(); GenTree* op2 = simdNode->gtGetOp2(); var_types simdType = op1->TypeGet(); - assert(varTypeIsSIMD(simdType) || simdType == TYP_DOUBLE); + assert(varTypeIsSIMD(simdType)); // op1 of TYP_SIMD12 should be considered as TYP_SIMD16, // since it is in XMM register. @@ -1437,7 +1443,18 @@ CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) } noway_assert(op2->isContained()); - int byteShiftCnt = (int) op2->gtIntCon.gtIconVal * genTypeSize(baseType); + unsigned int index = (unsigned int) op2->gtIntCon.gtIconVal; + unsigned int byteShiftCnt = index * genTypeSize(baseType); + + // In general we shouldn't have an index greater than or equal to the length of the vector. + // However, if we have an out-of-range access, under minOpts it will not be optimized + // away. The code will throw before we reach this point, but we still need to generate + // code. In that case, we will simply mask off the upper bits. + if (byteShiftCnt >= compiler->getSIMDVectorRegisterByteLength()) + { + byteShiftCnt &= (compiler->getSIMDVectorRegisterByteLength() - 1); + index = byteShiftCnt / genTypeSize(baseType); + } regNumber tmpReg = REG_NA; if (simdNode->gtRsvdRegs != RBM_NONE) @@ -1503,7 +1520,7 @@ CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) // In case of vector<short> we also need to sign extend the 16-bit value in targetReg // Vector<byte> - index/2 will give the index of the 16-bit value to extract. Shift right // by 8-bits if index is odd. In case of Vector<sbyte> also sign extend targetReg. - int index = (int) op2->gtIntCon.gtIconVal; + unsigned baseSize = genTypeSize(baseType); if (baseSize == 1) { @@ -1624,7 +1641,7 @@ CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) regNumber targetReg = simdNode->gtRegNum; assert(targetReg != REG_NA); var_types targetType = simdNode->TypeGet(); - assert(varTypeIsSIMD(targetType) || targetType == TYP_DOUBLE); + assert(varTypeIsSIMD(targetType)); // the following assert must hold. // supported only on vector2f/3f/4f right now diff --git a/src/jit/ssabuilder.cpp b/src/jit/ssabuilder.cpp index 1dc9d3907d..f6172d39db 100644 --- a/src/jit/ssabuilder.cpp +++ b/src/jit/ssabuilder.cpp @@ -857,6 +857,7 @@ void SsaBuilder::AddDefPoint(GenTree* tree, BasicBlock* blk) #endif // Record where the defn happens. m_pCompiler->lvaTable[lclNum].GetPerSsaData(defSsaNum)->m_defLoc.m_blk = blk; + m_pCompiler->lvaTable[lclNum].GetPerSsaData(defSsaNum)->m_defLoc.m_tree = tree; #ifdef SSA_FEATURE_USEDEF SsaVarName key(lclNum, defSsaNum); diff --git a/src/jit/stackfp.cpp b/src/jit/stackfp.cpp index 05bfba7944..e3f8432dc7 100644 --- a/src/jit/stackfp.cpp +++ b/src/jit/stackfp.cpp @@ -2350,8 +2350,10 @@ void CodeGen::genCodeForTreeStackFP_SmpOp(GenTreePtr tree) genCodeForTreeStackFP_DONE(tree, op1->gtRegNum); return; } - case GT_MATH: + case GT_INTRINSIC: { + assert(Compiler::IsMathIntrinsic(tree)); + GenTreePtr op1 = tree->gtOp.gtOp1; // get tree into a register @@ -2375,8 +2377,8 @@ void CodeGen::genCodeForTreeStackFP_SmpOp(GenTreePtr tree) assert(mathIns[CORINFO_INTRINSIC_Sqrt] == INS_fsqrt); assert(mathIns[CORINFO_INTRINSIC_Abs ] == INS_fabs ); assert(mathIns[CORINFO_INTRINSIC_Round] == INS_frndint); - assert((unsigned)(tree->gtMath.gtMathFN) < sizeof(mathIns)/sizeof(mathIns[0])); - instGen(mathIns[tree->gtMath.gtMathFN]); + assert((unsigned)(tree->gtIntrinsic.gtIntrinsicId) < sizeof(mathIns)/sizeof(mathIns[0])); + instGen(mathIns[tree->gtIntrinsic.gtIntrinsicId]); // mark register that holds tree genCodeForTreeStackFP_DONE(tree, op1->gtRegNum); diff --git a/src/jit/target.h b/src/jit/target.h index ab09293595..60e482997d 100644 --- a/src/jit/target.h +++ b/src/jit/target.h @@ -26,6 +26,20 @@ #endif // !(defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX)) /*****************************************************************************/ +// The following are human readable names for the target architectures +#if defined(_TARGET_X86_) + #define TARGET_READABLE_NAME "X86" +#elif defined(_TARGET_AMD64_) + #define TARGET_READABLE_NAME "AMD64" +#elif defined(_TARGET_ARM_) + #define TARGET_READABLE_NAME "ARM" +#elif defined(_TARGET_ARM64_) + #define TARGET_READABLE_NAME "ARM64" +#else + #error Unsupported or unset target architecture +#endif + +/*****************************************************************************/ // The following are intended to capture only those #defines that cannot be replaced // with static const members of Target #if defined(_TARGET_X86_) && defined(LEGACY_BACKEND) @@ -362,6 +376,10 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define FEATURE_FASTTAILCALL 0 // Tail calls made as epilog+jmp #define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (without ".tail" prefix) made as fast tail calls. #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set + #define FEATURE_MULTIREG_STRUCTS 0 // Support for passing and/or returning structs in more than one register + #define FEATURE_MULTIREG_STRUCT_ARGS 0 // Support for passing structs in more than one register + #define FEATURE_MULTIREG_STRUCT_RET 0 // Support for returning structs in more than one register + #ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS #define NOGC_WRITE_BARRIERS 1 // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers #else @@ -409,8 +427,11 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define RBM_ALLFLOAT (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM3 | RBM_XMM4 | RBM_XMM5 | RBM_XMM6 | RBM_XMM7) #define RBM_ALLDOUBLE RBM_ALLFLOAT - #define RBM_FLT_CALLEE_SAVED (RBM_XMM6|RBM_XMM7) - #define RBM_FLT_CALLEE_TRASH (RBM_XMM0|RBM_XMM1|RBM_XMM2|RBM_XMM3|RBM_XMM4|RBM_XMM5) + + // TODO-CQ: Currently we are following the x86 ABI for SSE2 registers. + // This should be reconsidered. + #define RBM_FLT_CALLEE_SAVED RBM_NONE + #define RBM_FLT_CALLEE_TRASH RBM_ALLFLOAT #define REG_VAR_ORDER_FLT REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3, REG_XMM4, REG_XMM5, REG_XMM6, REG_XMM7 #define REG_FLT_CALLEE_SAVED_FIRST REG_XMM6 @@ -448,7 +469,6 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define RBM_INT_CALLEE_SAVED (RBM_EBX|RBM_ESI|RBM_EDI) #define RBM_INT_CALLEE_TRASH (RBM_EAX|RBM_ECX|RBM_EDX) - #define RBM_CALLEE_TRASH_NOGC 0 #define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED) #define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH) @@ -528,9 +548,16 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define REG_JUMP_THUNK_PARAM REG_EAX #define RBM_JUMP_THUNK_PARAM RBM_EAX +#if NOGC_WRITE_BARRIERS #define REG_WRITE_BARRIER REG_EDX #define RBM_WRITE_BARRIER RBM_EDX + // We don't allow using ebp as a source register. Maybe we should only prevent this for ETW_EBP_FRAMED (but that is always set right now). + #define RBM_WRITE_BARRIER_SRC (RBM_EAX|RBM_ECX|RBM_EBX|RBM_ESI|RBM_EDI) + + #define RBM_CALLEE_TRASH_NOGC RBM_NONE +#endif // NOGC_WRITE_BARRIERS + // IL stub's secret parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM) #define REG_SECRET_STUB_PARAM REG_EAX #define RBM_SECRET_STUB_PARAM RBM_EAX @@ -661,6 +688,19 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp #define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set +#ifdef UNIX_AMD64_ABI + #define FEATURE_MULTIREG_STRUCTS 1 // Support for passing and/or returning structs in more than one register + #define FEATURE_MULTIREG_STRUCT_ARGS 1 // Support for passing structs in more than one register + #define FEATURE_MULTIREG_STRUCT_RET 1 // Support for returning structs in more than one register + #define FEATURE_STRUCT_CLASSIFIER 1 // Uses a classifier function to determine if structs are passed/returned in more than one register + #define MAX_PASS_MULTIREG_BYTES 32 // Maximum size of a struct that could be passed in more than one register + #define MAX_RET_MULTIREG_BYTES 32 // Maximum size of a struct that could be returned in more than one register +#else // !UNIX_AMD64_ABI + #define FEATURE_MULTIREG_STRUCTS 0 // Support for passing and/or returning structs in more than one register + #define FEATURE_MULTIREG_STRUCT_ARGS 0 // Support for passing structs in more than one register + #define FEATURE_MULTIREG_STRUCT_RET 0 // Support for returning structs in more than one register +#endif // !UNIX_AMD64_ABI + #ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS #define NOGC_WRITE_BARRIERS 0 // We DO-NOT have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers #else @@ -687,8 +727,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define REG_FP_LAST REG_XMM15 #define FIRST_FP_ARGREG REG_XMM0 #define LAST_FP_ARGREG REG_XMM3 - #define VOLATILE_FP (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM3 | RBM_XMM4 | RBM_XMM5) - #define PRESERVED_FP (RBM_XMM8 | RBM_XMM9 | RBM_XMM10 | RBM_XMM11 | RBM_XMM12 | RBM_XMM13 | RBM_XMM14 | RBM_XMM15) + #define REGNUM_BITS 6 // number of bits in a REG_* #define TINY_REGNUM_BITS 6 // number used in a tiny instrdesc (same) #define REGMASK_BITS 32 // number of bits in a REGNUM_MASK @@ -1073,6 +1112,13 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define FEATURE_FASTTAILCALL 0 // Tail calls made as epilog+jmp #define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. #define FEATURE_SET_FLAGS 1 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set + #define FEATURE_MULTIREG_STRUCTS 1 // Support for passing and/or returning structs in more than one register (including HFA support) + #define FEATURE_MULTIREG_STRUCT_ARGS 1 // Support for passing structs in more than one register (including passing HFAs) + #define FEATURE_MULTIREG_STRUCT_RET 1 // Support for returning structs in more than one register (including HFA returns) + #define FEATURE_STRUCT_CLASSIFIER 0 // Uses a classifier function to determine is structs are passed/returned in more than one register + #define MAX_PASS_MULTIREG_BYTES 32 // Maximum size of a struct that could be passed in more than one register (Max is an HFA of 4 doubles) + #define MAX_RET_MULTIREG_BYTES 32 // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 4 doubles) + #ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS #define NOGC_WRITE_BARRIERS 0 // We DO-NOT have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers #else @@ -1080,8 +1126,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #endif #define USER_ARGS_COME_LAST 1 #define EMIT_TRACK_STACK_DEPTH 1 // This is something of a workaround. For both ARM and AMD64, the frame size is fixed, so we don't really - // need to track stack depth, but this is currently necessary to - // get GC information reported at call sites. + // need to track stack depth, but this is currently necessary to get GC information reported at call sites. #define TARGET_POINTER_SIZE 4 // equal to sizeof(void*) and the managed pointer size in bytes for this target #define FEATURE_EH 1 // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses. #define FEATURE_EH_FUNCLETS 1 @@ -1379,6 +1424,12 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define FEATURE_FASTTAILCALL 0 // Tail calls made as epilog+jmp #define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. #define FEATURE_SET_FLAGS 1 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set + #define FEATURE_MULTIREG_STRUCTS 1 // Support for passing and/or returning structs in more than one register + #define FEATURE_MULTIREG_STRUCT_ARGS 1 // Support for passing structs in more than one register + #define FEATURE_MULTIREG_STRUCT_RET 0 // Support for returning structs in more than one register + #define FEATURE_STRUCT_CLASSIFIER 0 // Uses a classifier function to determine is structs are passed/returned in more than one register + #define MAX_PASS_MULTIREG_BYTES 16 // Maximum size of a struct that could be passed in more than one register + #ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS #define NOGC_WRITE_BARRIERS 1 // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers #else @@ -1386,8 +1437,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #endif #define USER_ARGS_COME_LAST 1 #define EMIT_TRACK_STACK_DEPTH 1 // This is something of a workaround. For both ARM and AMD64, the frame size is fixed, so we don't really - // need to track stack depth, but this is currently necessary to - // get GC information reported at call sites. + // need to track stack depth, but this is currently necessary to get GC information reported at call sites. #define TARGET_POINTER_SIZE 8 // equal to sizeof(void*) and the managed pointer size in bytes for this target #define FEATURE_EH 1 // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses. #define FEATURE_EH_FUNCLETS 1 diff --git a/src/jit/typelist.h b/src/jit/typelist.h index ec33e7b1f4..86ad095ad2 100644 --- a/src/jit/typelist.h +++ b/src/jit/typelist.h @@ -54,7 +54,7 @@ DEF_TP(DOUBLE ,"double" , TYP_DOUBLE, TI_DOUBLE,8, 8, 8, 2, 8, VTF_FLT, DEF_TP(REF ,"ref" , TYP_REF, TI_REF, PS,GCS,GCS, PST,PS, VTF_ANY|VTF_GCR|VTF_I,TYPE_REF_PTR) DEF_TP(BYREF ,"byref" , TYP_BYREF, TI_ERROR,PS,BRS,BRS, PST,PS, VTF_ANY|VTF_BYR|VTF_I,TYPE_REF_BYR) DEF_TP(ARRAY ,"array" , TYP_REF, TI_REF, PS,GCS,GCS, PST,PS, VTF_ANY|VTF_GCR|VTF_I,TYPE_REF_PTR) -DEF_TP(STRUCT ,"struct" , TYP_STRUCT, TI_STRUCT,0, 0, 0, 1, 4, VTF_ANY, TYPE_REF_STC) +DEF_TP(STRUCT ,"struct" , TYP_STRUCT, TI_STRUCT,0, 0, 0, 1, 4, VTF_S, TYPE_REF_STC) DEF_TP(BLK ,"blk" , TYP_BLK, TI_ERROR, 0, 0, 0, 1, 4, VTF_ANY, 0 ) // blob of memory DEF_TP(LCLBLK ,"lclBlk" , TYP_LCLBLK, TI_ERROR, 0, 0, 0, 1, 4, VTF_ANY, 0 ) // preallocated memory for locspace @@ -64,9 +64,10 @@ DEF_TP(FNC ,"function", TYP_FNC, TI_ERROR, 0, PS, PS, 0, 0, VTF_ANY|VT #ifdef FEATURE_SIMD // Amd64: The size and alignment of SIMD vector varies at JIT time based on whether target arch supports AVX or SSE2. -DEF_TP(SIMD12 ,"simd12" , TYP_SIMD16, TI_STRUCT,12,12, 16, 4,16, VTF_ANY, TYPE_REF_STC) -DEF_TP(SIMD16 ,"simd16" , TYP_SIMD16, TI_STRUCT,16,16, 16, 4,16, VTF_ANY, TYPE_REF_STC) -DEF_TP(SIMD32 ,"simd32" , TYP_SIMD32, TI_STRUCT,32,32, 32, 8,16, VTF_ANY, TYPE_REF_STC) +DEF_TP(SIMD8 ,"simd8" , TYP_SIMD8, TI_STRUCT, 8, 8, 8, 2, 8, VTF_S, TYPE_REF_STC) +DEF_TP(SIMD12 ,"simd12" , TYP_SIMD12, TI_STRUCT,12,16, 16, 4,16, VTF_S, TYPE_REF_STC) +DEF_TP(SIMD16 ,"simd16" , TYP_SIMD16, TI_STRUCT,16,16, 16, 4,16, VTF_S, TYPE_REF_STC) +DEF_TP(SIMD32 ,"simd32" , TYP_SIMD32, TI_STRUCT,32,32, 32, 8,16, VTF_S, TYPE_REF_STC) #endif // FEATURE_SIMD DEF_TP(UNKNOWN ,"unknown" ,TYP_UNKNOWN, TI_ERROR, 0, 0, 0, 0, 0, VTF_ANY, 0 ) diff --git a/src/jit/unwindamd64.cpp b/src/jit/unwindamd64.cpp index b93c15d223..a11cc1a52d 100644 --- a/src/jit/unwindamd64.cpp +++ b/src/jit/unwindamd64.cpp @@ -479,7 +479,9 @@ void Compiler::unwindReserveFuncHelper(FuncInfoDsc* func, bool isHotCode) // Prepend the unwindHeader onto the unwind codes assert(func->unwindCodeSlot >= offsetof(UNWIND_INFO, UnwindCode)); - UNWIND_INFO * pHeader = (UNWIND_INFO*)&func->unwindCodes[func->unwindCodeSlot -= offsetof(UNWIND_INFO, UnwindCode)]; + + func->unwindCodeSlot -= offsetof(UNWIND_INFO, UnwindCode); + UNWIND_INFO * pHeader = (UNWIND_INFO*)&func->unwindCodes[func->unwindCodeSlot]; memcpy(pHeader, &func->unwindHeader, offsetof(UNWIND_INFO, UnwindCode)); unwindCodeBytes = sizeof(func->unwindCodes) - func->unwindCodeSlot; diff --git a/src/jit/utils.cpp b/src/jit/utils.cpp index a5386ff999..0da479d684 100644 --- a/src/jit/utils.cpp +++ b/src/jit/utils.cpp @@ -1364,9 +1364,9 @@ void HelperCallProperties::init() case CORINFO_HELP_VERIFICATION: case CORINFO_HELP_RNGCHKFAIL: case CORINFO_HELP_THROWDIVZERO: -#ifndef RYUJIT_CTPBUILD +#if COR_JIT_EE_VERSION > 460 case CORINFO_HELP_THROWNULLREF: -#endif +#endif // COR_JIT_EE_VERSION case CORINFO_HELP_THROW: case CORINFO_HELP_RETHROW: diff --git a/src/jit/valuenum.cpp b/src/jit/valuenum.cpp index 9476e5472f..8fbdca27a6 100644 --- a/src/jit/valuenum.cpp +++ b/src/jit/valuenum.cpp @@ -829,6 +829,13 @@ ValueNum ValueNumStore::VNZeroForType(var_types typ) case TYP_ARRAY: return VNForNull(); case TYP_STRUCT: +#ifdef FEATURE_SIMD + // TODO-CQ: Improve value numbering for SIMD types. + case TYP_SIMD8: + case TYP_SIMD12: + case TYP_SIMD16: + case TYP_SIMD32: +#endif // FEATURE_SIMD return VNForZeroMap(); // Recursion! // These should be unreached. @@ -1086,6 +1093,10 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V { return VNOneForType(typ); } + if ((arg0VN == VNForNull() && IsKnownNonNull(arg1VN)) || (arg1VN == VNForNull() && IsKnownNonNull(arg0VN))) + { + return VNZeroForType(typ); + } break; case GT_NE: // (x != x) => false (unless x is NaN) @@ -1093,6 +1104,10 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V { return VNZeroForType(typ); } + if ((arg0VN == VNForNull() && IsKnownNonNull(arg1VN)) || (arg1VN == VNForNull() && IsKnownNonNull(arg0VN))) + { + return VNOneForType(typ); + } break; default: @@ -1900,6 +1915,8 @@ bool ValueNumStore::CanEvalForConstantArgs(VNFunc vnf) { case VNF_Cast: // We can evaluate these. return true; + case VNF_ObjGetType: + return false; default: return false; } @@ -2096,16 +2113,22 @@ ValueNum ValueNumStore::VNApplySelectors(ValueNumKind vnk, ValueNum map, FieldSe // Otherwise, is a real field handle. CORINFO_FIELD_HANDLE fldHnd = fieldSeq->m_fieldHnd; - CORINFO_CLASS_HANDLE structType = NULL; + CORINFO_CLASS_HANDLE structHnd = NO_CLASS_HANDLE; ValueNum fldHndVN = VNForHandle(ssize_t(fldHnd), GTF_ICON_FIELD_HDL); noway_assert(fldHnd != nullptr); - CorInfoType fieldCit = m_pComp->info.compCompHnd->getFieldType(fldHnd, &structType); + CorInfoType fieldCit = m_pComp->info.compCompHnd->getFieldType(fldHnd, &structHnd); var_types fieldType = JITtype2varType(fieldCit); size_t structSize = 0; - if (fieldType == TYP_STRUCT) + if (varTypeIsStruct(fieldType)) { - structSize = m_pComp->info.compCompHnd->getClassSize(structType); + structSize = m_pComp->info.compCompHnd->getClassSize(structHnd); + // We do not normalize the type field accesses during importation unless they + // are used in a call, return or assignment. + if ((fieldType == TYP_STRUCT) && (structSize <= m_pComp->largestEnregisterableStructSize())) + { + fieldType = m_pComp->impNormStructType(structHnd); + } } if (wbFinalStructSize != nullptr) { @@ -2119,7 +2142,7 @@ ValueNum ValueNumStore::VNApplySelectors(ValueNumKind vnk, ValueNum map, FieldSe const char* modName; const char* fldName = m_pComp->eeGetFieldName(fldHnd, &modName); printf(" VNForHandle(Fseq[%s]) is " STR_VN "%x, fieldType is %s", fldName, fldHndVN, varTypeName(fieldType)); - if (fieldType == TYP_STRUCT) + if (varTypeIsStruct(fieldType)) { printf(", size = %d", structSize); } @@ -2129,7 +2152,7 @@ ValueNum ValueNumStore::VNApplySelectors(ValueNumKind vnk, ValueNum map, FieldSe if (fieldSeq->m_next != nullptr) { - ValueNum newMap = VNForMapSelect(vnk, TYP_STRUCT, map, fldHndVN); + ValueNum newMap = VNForMapSelect(vnk, fieldType, map, fldHndVN); return VNApplySelectors(vnk, newMap, fieldSeq->m_next, wbFinalStructSize); } else // end of fieldSeq @@ -2159,7 +2182,7 @@ ValueNum ValueNumStore::VNApplySelectorsTypeCheck(ValueNum elem, var_types i size_t elemTypSize = (elemTyp == TYP_STRUCT) ? elemStructSize : genTypeSize(elemTyp); size_t indTypeSize = genTypeSize(indType); - if ((indType == TYP_REF) && (elemTyp == TYP_STRUCT)) + if ((indType == TYP_REF) && (varTypeIsStruct(elemTyp))) { // indType is TYP_REF and elemTyp is TYP_STRUCT // @@ -2175,7 +2198,7 @@ ValueNum ValueNumStore::VNApplySelectorsTypeCheck(ValueNum elem, var_types i elem = VNForExpr(indType); JITDUMP(" *** Mismatched types in VNApplySelectorsTypeCheck (reading beyond the end)\n"); } - else if (indType == TYP_STRUCT) + else if (varTypeIsStruct(indType)) { // indType is TYP_STRUCT @@ -2212,7 +2235,7 @@ ValueNum ValueNumStore::VNApplySelectorsAssignTypeCoerce(ValueNum elem, var_ { // We are trying to write an 'elem' of type 'elemType' using 'indType' store - if (indType == TYP_STRUCT) + if (varTypeIsStruct(indType)) { // return a new unique value number elem = VNForExpr(indType); @@ -2404,19 +2427,21 @@ ValueNum ValueNumStore::VNForPtrToLoc(var_types typ, ValueNum lclVarVN, ValueNum ValueNum ValueNumStore::ExtendPtrVN(GenTreePtr opA, GenTreePtr opB) { - if (opB->OperGet() == GT_CNS_INT && opB->gtIntCon.gtFieldSeq != NULL) - { - return ExtendPtrVN(opA, opB->gtIntCon.gtFieldSeq); - } - else + if (opB->OperGet() == GT_CNS_INT) { - return NoVN; + FieldSeqNode* fldSeq = opB->gtIntCon.gtFieldSeq; + if ((fldSeq != NULL) && (fldSeq != FieldSeqStore::NotAField())) + { + return ExtendPtrVN(opA, opB->gtIntCon.gtFieldSeq); + } } + return NoVN; } ValueNum ValueNumStore::ExtendPtrVN(GenTreePtr opA, FieldSeqNode* fldSeq) { ValueNum res = NoVN; + assert(fldSeq != FieldSeqStore::NotAField()); ValueNum opAvnWx = opA->gtVNPair.GetLiberal(); assert(VNIsValid(opAvnWx)); @@ -2699,6 +2724,14 @@ bool ValueNumStore::IsVNConstant(ValueNum vn) } } +bool ValueNumStore::IsVNInt32Constant(ValueNum vn) +{ + if (!IsVNConstant(vn)) + return false; + + return TypeOfVN(vn) == TYP_INT; +} + unsigned ValueNumStore::GetHandleFlags(ValueNum vn) { assert(IsVNHandle(vn)); @@ -2716,6 +2749,50 @@ bool ValueNumStore::IsVNHandle(ValueNum vn) return c->m_attribs == CEA_Handle; } +bool ValueNumStore::IsVNConstantBound(ValueNum vn) +{ + // Do we have "var < 100"? + if (vn == NoVN) return false; + + VNFuncApp funcAttr; + if (!GetVNFunc(vn, &funcAttr)) + { + return false; + } + if (funcAttr.m_func != (VNFunc)GT_LE && funcAttr.m_func != (VNFunc)GT_GE && + funcAttr.m_func != (VNFunc)GT_LT && funcAttr.m_func != (VNFunc)GT_GT) + { + return false; + } + + return IsVNInt32Constant(funcAttr.m_args[0]) != IsVNInt32Constant(funcAttr.m_args[1]); +} + +void ValueNumStore::GetConstantBoundInfo(ValueNum vn, ConstantBoundInfo* info) +{ + assert(IsVNConstantBound(vn)); + assert(info); + + // Do we have var < 100? + VNFuncApp funcAttr; + GetVNFunc(vn, &funcAttr); + + bool isOp1Const = IsVNInt32Constant(funcAttr.m_args[1]); + + if (isOp1Const) + { + info->cmpOper = funcAttr.m_func; + info->cmpOpVN = funcAttr.m_args[0]; + info->constVal = GetConstantInt32(funcAttr.m_args[1]); + } + else + { + info->cmpOper = GenTree::SwapRelop((genTreeOps)funcAttr.m_func); + info->cmpOpVN = funcAttr.m_args[1]; + info->constVal = GetConstantInt32(funcAttr.m_args[0]); + } +} + bool ValueNumStore::IsVNArrLenBound(ValueNum vn) { // Do we have "var < a.len"? @@ -2890,11 +2967,13 @@ bool ValueNumStore::IsVNArrLen(ValueNum vn) } -ValueNum ValueNumStore::EvalMathFunc(var_types typ, CorInfoIntrinsics gtMathFN, ValueNum arg0VN) +ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, CorInfoIntrinsics gtMathFN, ValueNum arg0VN) { assert(arg0VN == VNNormVal(arg0VN)); - if (IsVNConstant(arg0VN)) + if (IsVNConstant(arg0VN) && Compiler::IsTargetIntrinsic(gtMathFN)) { + // If the math intrinsic is not implemented by target-specific instructions, such as implemented + // by user calls, then don't do constant folding on it. This minimizes precision loss. // I *may* need separate tracks for the double/float -- if the intrinsic funcs have overloads for these. double arg0Val = GetConstantDouble(arg0VN); @@ -2902,19 +2981,19 @@ ValueNum ValueNumStore::EvalMathFunc(var_types typ, CorInfoIntrinsics gtMathFN, switch (gtMathFN) { case CORINFO_INTRINSIC_Sin: - res = sin(arg0Val); + res = sin(arg0Val); break; case CORINFO_INTRINSIC_Cos: - res = cos(arg0Val); + res = cos(arg0Val); break; case CORINFO_INTRINSIC_Sqrt: - res = sqrt(arg0Val); + res = sqrt(arg0Val); break; case CORINFO_INTRINSIC_Abs: res = fabs(arg0Val); // The result and params are doubles. break; case CORINFO_INTRINSIC_Round: - res = (arg0Val > 0.0 ? floor(arg0Val + 0.5) : ceil(arg0Val - 0.5)); + res = (arg0Val > 0.0 ? floor(arg0Val + 0.5) : ceil(arg0Val - 0.5)); break; default: unreached(); // the above are the only math intrinsics at the time of this writing. @@ -2937,50 +3016,112 @@ ValueNum ValueNumStore::EvalMathFunc(var_types typ, CorInfoIntrinsics gtMathFN, } else { + assert(typ == TYP_DOUBLE + || typ == TYP_FLOAT + || (typ == TYP_INT && gtMathFN == CORINFO_INTRINSIC_Round)); + VNFunc vnf = VNF_Boundary; switch (gtMathFN) { - case CORINFO_INTRINSIC_Sin: - vnf = VNF_Sin; - break; - case CORINFO_INTRINSIC_Cos: - vnf = VNF_Cos; - break; - case CORINFO_INTRINSIC_Sqrt: - vnf = VNF_Sqrt; - break; - case CORINFO_INTRINSIC_Abs: - vnf = VNF_Abs; - break; - case CORINFO_INTRINSIC_Round: - if (typ == TYP_DOUBLE) - { - vnf = VNF_RoundDouble; - } - else if (typ == TYP_FLOAT) - { - vnf = VNF_RoundFloat; - } - else if (typ == TYP_INT) - { - vnf = VNF_RoundInt; - } - else - { - noway_assert(!"Invalid INTRINSIC_Round"); - } - break; - default: - unreached(); // the above are the only math intrinsics at the time of this writing. + case CORINFO_INTRINSIC_Sin: + vnf = VNF_Sin; + break; + case CORINFO_INTRINSIC_Cos: + vnf = VNF_Cos; + break; + case CORINFO_INTRINSIC_Sqrt: + vnf = VNF_Sqrt; + break; + case CORINFO_INTRINSIC_Abs: + vnf = VNF_Abs; + break; + case CORINFO_INTRINSIC_Round: + if (typ == TYP_DOUBLE) + { + vnf = VNF_RoundDouble; + } + else if (typ == TYP_FLOAT) + { + vnf = VNF_RoundFloat; + } + else if (typ == TYP_INT) + { + vnf = VNF_RoundInt; + } + else + { + noway_assert(!"Invalid INTRINSIC_Round"); + } + break; + case CORINFO_INTRINSIC_Cosh: + vnf = VNF_Cosh; + break; + case CORINFO_INTRINSIC_Sinh: + vnf = VNF_Sinh; + break; + case CORINFO_INTRINSIC_Tan: + vnf = VNF_Tan; + break; + case CORINFO_INTRINSIC_Tanh: + vnf = VNF_Tanh; + break; + case CORINFO_INTRINSIC_Asin: + vnf = VNF_Asin; + break; + case CORINFO_INTRINSIC_Acos: + vnf = VNF_Acos; + break; + case CORINFO_INTRINSIC_Atan: + vnf = VNF_Atan; + break; + case CORINFO_INTRINSIC_Log10: + vnf = VNF_Log10; + break; + case CORINFO_INTRINSIC_Exp: + vnf = VNF_Exp; + break; + case CORINFO_INTRINSIC_Ceiling: + vnf = VNF_Ceiling; + break; + case CORINFO_INTRINSIC_Floor: + vnf = VNF_Floor; + break; + default: + unreached(); // the above are the only math intrinsics at the time of this writing. } - assert(typ == TYP_DOUBLE - || typ == TYP_FLOAT - || (typ == TYP_INT && gtMathFN == CORINFO_INTRINSIC_Round)); return VNForFunc(typ, vnf, arg0VN); } } +ValueNum ValueNumStore::EvalMathFuncBinary(var_types typ, CorInfoIntrinsics gtMathFN, ValueNum arg0VN, ValueNum arg1VN) +{ + assert(varTypeIsFloating(typ)); + assert(arg0VN == VNNormVal(arg0VN)); + assert(arg1VN == VNNormVal(arg1VN)); + + VNFunc vnf = VNF_Boundary; + + // Currently, none of the binary math intrinsic are implemented by target-specific instructions. + // To minimize precision loss, do not do constant folding on them. + + switch (gtMathFN) + { + case CORINFO_INTRINSIC_Atan2: + vnf = VNF_Atan2; + break; + + case CORINFO_INTRINSIC_Pow: + vnf = VNF_Pow; + break; + + default: + unreached(); // the above are the only binary math intrinsics at the time of this writing. + } + + return VNForFunc(typ, vnf, arg0VN, arg1VN); +} + bool ValueNumStore::IsVNFunc(ValueNum vn) { if (vn == NoVN) @@ -3180,6 +3321,12 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) printf("byrefVal"); break; case TYP_STRUCT: +#ifdef FEATURE_SIMD + case TYP_SIMD8: + case TYP_SIMD12: + case TYP_SIMD16: + case TYP_SIMD32: +#endif // FEATURE_SIMD printf("structVal"); break; @@ -4247,7 +4394,7 @@ ValueNum Compiler::fgHeapVNForLoopSideEffects(BasicBlock* entryBlock, unsigned i if (verbose) { var_types elemTyp = DecodeElemType(elemClsHnd); - if (elemTyp == TYP_STRUCT) + if (varTypeIsStruct(elemTyp)) { printf(" Array map %s[]\n", eeGetClassName(elemClsHnd)); } @@ -4377,6 +4524,241 @@ void Compiler::fgValueNumberTreeConst(GenTreePtr tree) } } +//------------------------------------------------------------------------ +// fgValueNumberBlockAssignment: Perform value numbering for block assignments. +// +// Arguments: +// tree - the block assignment to be value numbered. +// evalAsgLhsInd - true iff we should value number the LHS of the assignment. +// +// Return Value: +// None. +// +// Assumptions: +// 'tree' must be a block assignment (GT_INITBLK, GT_COPYBLK, GT_COPYOBJ). + +void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd) +{ +#ifdef DEBUG + // Sometimes we query the heap ssa map, and need a dummy location for the ignored result. + unsigned heapSsaNum; +#endif + + if (tree->OperGet() == GT_INITBLK) + { + GenTreeLclVarCommon* lclVarTree; + bool isEntire; + + if (tree->DefinesLocal(this, &lclVarTree, &isEntire)) + { + assert(lclVarTree->gtFlags & GTF_VAR_DEF); + // Should not have been recorded as updating the heap. + assert(!GetHeapSsaMap()->Lookup(tree, &heapSsaNum)); + + unsigned lclNum = lclVarTree->GetLclNum(); + + // Ignore vars that we excluded from SSA (for example, because they're address-exposed). They don't have SSA names + // in which to store VN's on defs. We'll yield unique VN's when we read from them. + if (!fgExcludeFromSsa(lclNum)) + { + unsigned lclDefSsaNum = GetSsaNumForLocalVarDef(lclVarTree); + + ValueNum initBlkVN = ValueNumStore::NoVN; + GenTreePtr initConst = tree->gtGetOp1()->gtGetOp2(); + if (isEntire && initConst->OperGet() == GT_CNS_INT) + { + unsigned initVal = 0xFF & (unsigned) initConst->AsIntConCommon()->IconValue(); + if (initVal == 0) + { + initBlkVN = vnStore->VNZeroForType(lclVarTree->TypeGet()); + } + } + ValueNum lclVarVN = (initBlkVN != ValueNumStore::NoVN) ? initBlkVN + : vnStore->VNForExpr(var_types(lvaTable[lclNum].lvType)); + + lvaTable[lclNum].GetPerSsaData(lclDefSsaNum)->m_vnPair.SetBoth(lclVarVN); +#ifdef DEBUG + if (verbose) + { + printf("N%03u ", tree->gtSeqNum); + Compiler::printTreeID(tree); + printf(" "); + gtDispNodeName(tree); + printf(" V%02u/%d => ",lclNum, lclDefSsaNum); + vnPrint(lclVarVN, 1); + printf("\n"); + } +#endif // DEBUG + } + // Initblock's are of type void. Give them the void "value" -- they may occur in argument lists, which we want to be + // able to give VN's to. + tree->gtVNPair.SetBoth(ValueNumStore::VNForVoid()); + } + else + { + // For now, arbitrary side effect on Heap. + // TODO-CQ: Why not be complete, and get this case right? + fgMutateHeap(tree DEBUGARG("INITBLK - non local")); + } + } + else + { + assert(tree->OperIsCopyBlkOp()); + // TODO-Cleanup: We should factor things so that we uniformly rely on "PtrTo" VN's, and + // the heap cases can be shared with assignments. + GenTreeLclVarCommon* lclVarTree = NULL; + bool isEntire = false; + // Note that we don't care about exceptions here, since we're only using the values + // to perform an assignment (which happens after any exceptions are raised...) + + if (tree->DefinesLocal(this, &lclVarTree, &isEntire)) + { + // Should not have been recorded as updating the heap. + assert(!GetHeapSsaMap()->Lookup(tree, &heapSsaNum)); + + unsigned lhsLclNum = lclVarTree->GetLclNum(); + LclVarDsc* rhsVarDsc = &lvaTable[lhsLclNum]; + // If it's excluded from SSA, don't need to do anything. + if (!fgExcludeFromSsa(lhsLclNum)) + { + unsigned lclDefSsaNum = GetSsaNumForLocalVarDef(lclVarTree); + + // For addr-of-local expressions, lib/cons shouldn't matter. + assert(tree->gtOp.gtOp1->gtOp.gtOp1->gtVNPair.BothEqual()); + ValueNum lhsAddrVN = tree->gtOp.gtOp1->gtOp.gtOp1->GetVN(VNK_Liberal); + + // Unpack the PtrToLoc value number of the address. + assert(vnStore->IsVNFunc(lhsAddrVN)); + VNFuncApp lhsAddrFuncApp; + vnStore->GetVNFunc(lhsAddrVN, &lhsAddrFuncApp); + assert(lhsAddrFuncApp.m_func == VNF_PtrToLoc); + assert(vnStore->IsVNConstant(lhsAddrFuncApp.m_args[0]) && vnStore->ConstantValue<unsigned>(lhsAddrFuncApp.m_args[0]) == lhsLclNum); + FieldSeqNode* lhsFldSeq = vnStore->FieldSeqVNToFieldSeq(lhsAddrFuncApp.m_args[1]); + + // Now we need to get the proper RHS. + GenTreePtr srcAddr = tree->gtOp.gtOp1->gtOp.gtOp2; + VNFuncApp srcAddrFuncApp; + GenTreeLclVarCommon* rhsLclVarTree = nullptr; + FieldSeqNode* rhsFldSeq = nullptr; + ValueNumPair rhsVNPair; +#ifdef DEBUG + bool isNewUniq = false; +#endif + if (srcAddr->IsLocalAddrExpr(this, &rhsLclVarTree, &rhsFldSeq)) + { + unsigned rhsLclNum = rhsLclVarTree->GetLclNum(); + LclVarDsc* rhsVarDsc = &lvaTable[rhsLclNum]; + if (fgExcludeFromSsa(rhsLclNum) || rhsFldSeq == FieldSeqStore::NotAField()) + { + rhsVNPair.SetBoth(vnStore->VNForExpr(lclVarTree->TypeGet())); +#ifdef DEBUG + isNewUniq = true; +#endif + } + else + { + rhsVNPair = lvaTable[rhsLclVarTree->GetLclNum()].GetPerSsaData(rhsLclVarTree->GetSsaNum())->m_vnPair; + var_types indType = rhsLclVarTree->TypeGet(); + + rhsVNPair = vnStore->VNPairApplySelectors(rhsVNPair, rhsFldSeq, indType); + } + } + else if (vnStore->GetVNFunc(vnStore->VNNormVal(srcAddr->gtVNPair.GetLiberal()), &srcAddrFuncApp)) + { + if (srcAddrFuncApp.m_func == VNF_PtrToStatic) + { + var_types indType = lclVarTree->TypeGet(); + ValueNum fieldSeqVN = srcAddrFuncApp.m_args[0]; + + FieldSeqNode* zeroOffsetFldSeq = nullptr; + if (GetZeroOffsetFieldMap()->Lookup(srcAddr, &zeroOffsetFldSeq)) + { + fieldSeqVN = vnStore->FieldSeqVNAppend(fieldSeqVN, vnStore->VNForFieldSeq(zeroOffsetFldSeq)); + } + + FieldSeqNode* fldSeqForStaticVar = vnStore->FieldSeqVNToFieldSeq(fieldSeqVN); + + if (fldSeqForStaticVar != FieldSeqStore::NotAField()) + { + // We model statics as indices into the heap variable. + ValueNum selectedStaticVar; + size_t structSize = 0; + selectedStaticVar = vnStore->VNApplySelectors(VNK_Liberal, fgCurHeapVN, fldSeqForStaticVar, &structSize); + selectedStaticVar = vnStore->VNApplySelectorsTypeCheck(selectedStaticVar, indType, structSize); + + rhsVNPair.SetLiberal(selectedStaticVar); + rhsVNPair.SetConservative(vnStore->VNForExpr(indType)); + } + else + { + JITDUMP(" *** Missing field sequence info for COPYBLK\n"); + rhsVNPair.SetBoth(vnStore->VNForExpr(indType)); // a new unique value number + } + } + else if (srcAddrFuncApp.m_func == VNF_PtrToArrElem) + { + ValueNum elemLib = fgValueNumberArrIndexVal(nullptr, &srcAddrFuncApp, vnStore->VNForEmptyExcSet()); + rhsVNPair.SetLiberal(elemLib); + rhsVNPair.SetConservative(vnStore->VNForExpr(lclVarTree->TypeGet())); + } + else + { + rhsVNPair.SetBoth(vnStore->VNForExpr(lclVarTree->TypeGet())); +#ifdef DEBUG + isNewUniq = true; +#endif + } + } + else + { + rhsVNPair.SetBoth(vnStore->VNForExpr(lclVarTree->TypeGet())); +#ifdef DEBUG + isNewUniq = true; +#endif + } + + ValueNumPair newRhsVNPair; + if (lhsFldSeq != nullptr && isEntire) + { + // This can occur in for structs with one field, itself of a struct type. + // We won't promote these. + // TODO-Cleanup: decide what exactly to do about this. + // Always treat them as maps, making them use/def, or reconstitute the + // map view here? + newRhsVNPair.SetBoth(vnStore->VNForExpr(TYP_STRUCT)); + } + else + { + ValueNumPair oldLhsVNPair = lvaTable[lhsLclNum].GetPerSsaData(lclVarTree->GetSsaNum())->m_vnPair; + newRhsVNPair = vnStore->VNPairApplySelectorsAssign(oldLhsVNPair, lhsFldSeq, rhsVNPair, lclVarTree->TypeGet()); + } + lvaTable[lhsLclNum].GetPerSsaData(lclDefSsaNum)->m_vnPair = vnStore->VNPNormVal(newRhsVNPair); +#ifdef DEBUG + if (verbose) + { + printf("Tree "); + Compiler::printTreeID(tree); + printf(" assigned VN to local var V%02u/%d: ", + lhsLclNum, lclDefSsaNum); + if (isNewUniq) + printf("new uniq "); + vnpPrint(newRhsVNPair, 1); + printf("\n"); + } +#endif // DEBUG + } + } + else + { + // For now, arbitrary side effect on Heap. + // TODO-CQ: Why not be complete, and get this case right? + fgMutateHeap(tree DEBUGARG("COPYBLK - non local")); + } + // Copyblock's are of type void. Give them the void "value" -- they may occur in argument lists, which we want to be + // able to give VN's to. + tree->gtVNPair.SetBoth(ValueNumStore::VNForVoid()); + } +} void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd) { @@ -4471,7 +4853,7 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd) // Case 2) Local variables that represent structs which are assigned using CpBlk. GenTree* nextNode = lcl->gtNext; assert((nextNode->gtOper == GT_ADDR && nextNode->gtOp.gtOp1 == lcl) || - lcl->TypeGet() == TYP_STRUCT); + varTypeIsStruct(lcl->TypeGet())); lcl->gtVNPair.SetBoth(vnStore->VNForExpr(lcl->TypeGet())); } assert(lcl->gtVNPair.BothDefined()); @@ -4790,24 +5172,34 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd) if (lclDefSsaNum != SsaConfig::RESERVED_SSA_NUM) { ValueNumPair newLhsVNPair; - // We should never have a null field sequence here. - assert(lclFld->gtFieldSeq != NULL); - if (lclFld->gtFieldSeq == FieldSeqStore::NotAField()) + // Is this a full definition? + if ((lclFld->gtFlags & GTF_VAR_USEASG) == 0) { - // We don't know what field this represents. Assign a new VN to the whole variable - // (since we may be writing to an unknown portion of it.) - newLhsVNPair.SetBoth(vnStore->VNForExpr(lvaGetActualType(lclFld->gtLclNum))); + assert(!lclFld->IsPartialLclFld(this)); + assert(rhsVNPair.GetLiberal() != ValueNumStore::NoVN); + newLhsVNPair = rhsVNPair; } else { - // We do know the field sequence. - // The "lclFld" node will be labeled with the SSA number of its "use" identity - // (we looked in a side table above for its "def" identity). Look up that value. - ValueNumPair oldLhsVNPair = lvaTable[lclFld->GetLclNum()].GetPerSsaData(lclFld->GetSsaNum())->m_vnPair; - newLhsVNPair = vnStore->VNPairApplySelectorsAssign(oldLhsVNPair, - lclFld->gtFieldSeq, - rhsVNPair, // Pre-value. - lvaGetActualType(lclFld->gtLclNum)); + // We should never have a null field sequence here. + assert(lclFld->gtFieldSeq != NULL); + if (lclFld->gtFieldSeq == FieldSeqStore::NotAField()) + { + // We don't know what field this represents. Assign a new VN to the whole variable + // (since we may be writing to an unknown portion of it.) + newLhsVNPair.SetBoth(vnStore->VNForExpr(lvaGetActualType(lclFld->gtLclNum))); + } + else + { + // We do know the field sequence. + // The "lclFld" node will be labeled with the SSA number of its "use" identity + // (we looked in a side table above for its "def" identity). Look up that value. + ValueNumPair oldLhsVNPair = lvaTable[lclFld->GetLclNum()].GetPerSsaData(lclFld->GetSsaNum())->m_vnPair; + newLhsVNPair = vnStore->VNPairApplySelectorsAssign(oldLhsVNPair, + lclFld->gtFieldSeq, + rhsVNPair, // Pre-value. + lvaGetActualType(lclFld->gtLclNum)); + } } lvaTable[lclFld->GetLclNum()].GetPerSsaData(lclDefSsaNum)->m_vnPair = newLhsVNPair; lhs->gtVNPair = newLhsVNPair; @@ -5174,218 +5566,9 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd) } } // Other kinds of assignment: initblk and copyblk. - else if (oper == GT_INITBLK) + else if (tree->OperIsBlkOp()) { - GenTreeLclVarCommon* lclVarTree; - bool isEntire; - - if (tree->DefinesLocal(this, &lclVarTree, &isEntire)) - { - assert(lclVarTree->gtFlags & GTF_VAR_DEF); - // Should not have been recorded as updating the heap. - assert(!GetHeapSsaMap()->Lookup(tree, &heapSsaNum)); - - unsigned lclNum = lclVarTree->GetLclNum(); - - // Ignore vars that we excluded from SSA (for example, because they're address-exposed). They don't have SSA names - // in which to store VN's on defs. We'll yield unique VN's when we read from them. - if (!fgExcludeFromSsa(lclNum)) - { - unsigned lclDefSsaNum = GetSsaNumForLocalVarDef(lclVarTree); - - ValueNum initBlkVN = ValueNumStore::NoVN; - GenTreePtr initConst = tree->gtGetOp1()->gtGetOp2(); - if (isEntire && initConst->OperGet() == GT_CNS_INT) - { - unsigned initVal = 0xFF & (unsigned) initConst->AsIntConCommon()->IconValue(); - if (initVal == 0) - { - initBlkVN = vnStore->VNZeroForType(lclVarTree->TypeGet()); - } - } - ValueNum lclVarVN = (initBlkVN != ValueNumStore::NoVN) ? initBlkVN - : vnStore->VNForExpr(var_types(lvaTable[lclNum].lvType)); - - lvaTable[lclNum].GetPerSsaData(lclDefSsaNum)->m_vnPair.SetBoth(lclVarVN); -#ifdef DEBUG - if (verbose) - { - printf("N%03u ", tree->gtSeqNum); - Compiler::printTreeID(tree); - printf(" "); - gtDispNodeName(tree); - printf(" V%02u/%d => ",lclNum, lclDefSsaNum); - vnPrint(lclVarVN, 1); - printf("\n"); - } -#endif // DEBUG - } - // Initblock's are of type void. Give them the void "value" -- they may occur in argument lists, which we want to be - // able to give VN's to. - tree->gtVNPair.SetBoth(ValueNumStore::VNForVoid()); - } - else - { - // For now, arbitrary side effect on Heap. - // TODO-CQ: Why not be complete, and get this case right? - fgMutateHeap(tree DEBUGARG("INITBLK - non local")); - } - } - else if (GenTree::OperIsCopyBlkOp(oper)) - { - // TODO-Cleanup: We should factor things so that we uniformly rely on "PtrTo" VN's, and - // the heap cases can be shared with assignments. - GenTreeLclVarCommon* lclVarTree = NULL; - bool isEntire = false; - // Note that we don't care about exceptions here, since we're only using the values - // to perform an assignment (which happens after any exceptions are raised...) - - if (tree->DefinesLocal(this, &lclVarTree, &isEntire)) - { - // Should not have been recorded as updating the heap. - assert(!GetHeapSsaMap()->Lookup(tree, &heapSsaNum)); - - unsigned lhsLclNum = lclVarTree->GetLclNum(); - LclVarDsc* rhsVarDsc = &lvaTable[lhsLclNum]; - // If it's excluded from SSA, don't need to do anything. - if (!fgExcludeFromSsa(lhsLclNum)) - { - unsigned lclDefSsaNum = GetSsaNumForLocalVarDef(lclVarTree); - - // For addr-of-local expressions, lib/cons shouldn't matter. - assert(tree->gtOp.gtOp1->gtOp.gtOp1->gtVNPair.BothEqual()); - ValueNum lhsAddrVN = tree->gtOp.gtOp1->gtOp.gtOp1->GetVN(VNK_Liberal); - - // Unpack the PtrToLoc value number of the address. - assert(vnStore->IsVNFunc(lhsAddrVN)); - VNFuncApp lhsAddrFuncApp; - vnStore->GetVNFunc(lhsAddrVN, &lhsAddrFuncApp); - assert(lhsAddrFuncApp.m_func == VNF_PtrToLoc); - assert(vnStore->IsVNConstant(lhsAddrFuncApp.m_args[0]) && vnStore->ConstantValue<unsigned>(lhsAddrFuncApp.m_args[0]) == lhsLclNum); - FieldSeqNode* lhsFldSeq = vnStore->FieldSeqVNToFieldSeq(lhsAddrFuncApp.m_args[1]); - - // Now we need to get the proper RHS. - GenTreePtr srcAddr = tree->gtOp.gtOp1->gtOp.gtOp2; - VNFuncApp srcAddrFuncApp; - GenTreeLclVarCommon* rhsLclVarTree = nullptr; - FieldSeqNode* rhsFldSeq = nullptr; - ValueNumPair rhsVNPair; -#ifdef DEBUG - bool isNewUniq = false; -#endif - if (srcAddr->IsLocalAddrExpr(this, &rhsLclVarTree, &rhsFldSeq)) - { - unsigned rhsLclNum = rhsLclVarTree->GetLclNum(); - LclVarDsc* rhsVarDsc = &lvaTable[rhsLclNum]; - if (fgExcludeFromSsa(rhsLclNum) || rhsFldSeq == FieldSeqStore::NotAField()) - { - rhsVNPair.SetBoth(vnStore->VNForExpr(lclVarTree->TypeGet())); -#ifdef DEBUG - isNewUniq = true; -#endif - } - else - { - rhsVNPair = lvaTable[rhsLclVarTree->GetLclNum()].GetPerSsaData(rhsLclVarTree->GetSsaNum())->m_vnPair; - var_types indType = rhsLclVarTree->TypeGet(); - - rhsVNPair = vnStore->VNPairApplySelectors(rhsVNPair, rhsFldSeq, indType); - } - } - else if (vnStore->GetVNFunc(vnStore->VNNormVal(srcAddr->gtVNPair.GetLiberal()), &srcAddrFuncApp)) - { - if (srcAddrFuncApp.m_func == VNF_PtrToStatic) - { - var_types indType = lclVarTree->TypeGet(); - ValueNum fieldSeqVN = srcAddrFuncApp.m_args[0]; - - FieldSeqNode* zeroOffsetFldSeq = nullptr; - if (GetZeroOffsetFieldMap()->Lookup(srcAddr, &zeroOffsetFldSeq)) - { - fieldSeqVN = vnStore->FieldSeqVNAppend(fieldSeqVN, vnStore->VNForFieldSeq(zeroOffsetFldSeq)); - } - - FieldSeqNode* fldSeqForStaticVar = vnStore->FieldSeqVNToFieldSeq(fieldSeqVN); - - if (fldSeqForStaticVar != FieldSeqStore::NotAField()) - { - // We model statics as indices into the heap variable. - ValueNum selectedStaticVar; - size_t structSize = 0; - selectedStaticVar = vnStore->VNApplySelectors(VNK_Liberal, fgCurHeapVN, fldSeqForStaticVar, &structSize); - selectedStaticVar = vnStore->VNApplySelectorsTypeCheck(selectedStaticVar, indType, structSize); - - rhsVNPair.SetLiberal(selectedStaticVar); - rhsVNPair.SetConservative(vnStore->VNForExpr(indType)); - } - else - { - JITDUMP(" *** Missing field sequence info for COPYBLK\n"); - rhsVNPair.SetBoth(vnStore->VNForExpr(indType)); // a new unique value number - } - } - else if (srcAddrFuncApp.m_func == VNF_PtrToArrElem) - { - ValueNum elemLib = fgValueNumberArrIndexVal(nullptr, &srcAddrFuncApp, vnStore->VNForEmptyExcSet()); - rhsVNPair.SetLiberal(elemLib); - rhsVNPair.SetConservative(vnStore->VNForExpr(lclVarTree->TypeGet())); - } - else - { - rhsVNPair.SetBoth(vnStore->VNForExpr(lclVarTree->TypeGet())); -#ifdef DEBUG - isNewUniq = true; -#endif - } - } - else - { - rhsVNPair.SetBoth(vnStore->VNForExpr(lclVarTree->TypeGet())); -#ifdef DEBUG - isNewUniq = true; -#endif - } - - ValueNumPair newRhsVNPair; - if (lhsFldSeq != nullptr && isEntire) - { - // This can occur in for structs with one field, itself of a struct type. - // We won't promote these. - // TODO-Cleanup: decide what exactly to do about this. - // Always treat them as maps, making them use/def, or reconstitute the - // map view here? - newRhsVNPair.SetBoth(vnStore->VNForExpr(TYP_STRUCT)); - } - else - { - ValueNumPair oldLhsVNPair = lvaTable[lhsLclNum].GetPerSsaData(lclVarTree->GetSsaNum())->m_vnPair; - newRhsVNPair = vnStore->VNPairApplySelectorsAssign(oldLhsVNPair, lhsFldSeq, rhsVNPair, lclVarTree->TypeGet()); - } - lvaTable[lhsLclNum].GetPerSsaData(lclDefSsaNum)->m_vnPair = vnStore->VNPNormVal(newRhsVNPair); -#ifdef DEBUG - if (verbose) - { - printf("Tree "); - Compiler::printTreeID(tree); - printf(" assigned VN to local var V%02u/%d: ", - lhsLclNum, lclDefSsaNum); - if (isNewUniq) - printf("new uniq "); - vnpPrint(newRhsVNPair, 1); - printf("\n"); - } -#endif // DEBUG - } - } - else - { - // For now, arbitrary side effect on Heap. - // TODO-CQ: Why not be complete, and get this case right? - fgMutateHeap(tree DEBUGARG("COPYBLK - non local")); - } - // Copyblock's are of type void. Give them the void "value" -- they may occur in argument lists, which we want to be - // able to give VN's to. - tree->gtVNPair.SetBoth(ValueNumStore::VNForVoid()); + fgValueNumberBlockAssignment(tree, evalAsgLhsInd); } else if (oper == GT_ADDR) { @@ -5419,11 +5602,13 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd) } else if (arg->gtOper == GT_IND) { - // ADDR and IND cancel out...except that this GT_ADDR may add a new field sequence, - // if it's a zero-offset... + // Usually the ADDR and IND just cancel out... + // except when this GT_ADDR has a valid zero-offset field sequence + // FieldSeqNode* zeroOffsetFieldSeq = nullptr; - if (GetZeroOffsetFieldMap()->Lookup(tree, &zeroOffsetFieldSeq)) - { + if (GetZeroOffsetFieldMap()->Lookup(tree, &zeroOffsetFieldSeq) && + (zeroOffsetFieldSeq != FieldSeqStore::NotAField())) + { ValueNum addrExtended = vnStore->ExtendPtrVN(arg->gtOp.gtOp1, zeroOffsetFieldSeq); if (addrExtended != ValueNumStore::NoVN) { @@ -5431,18 +5616,21 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd) } else { - // We lose the zero-offset field. Make sure we don't give this address a value. + // ExtendPtrVN returned a failure result + // So give this address a new unique value tree->gtVNPair.SetBoth(vnStore->VNForExpr(TYP_BYREF)); } } else { - // They just cancel. + // They just cancel, so fetch the ValueNumber from the op1 of the GT_IND node. + // tree->gtVNPair = arg->gtOp.gtOp1->gtVNPair; + + // For the CSE phase mark the GT_IND child as GTF_DONT_CSE + // because it will end up with the same value number as tree (the GT_ADDR). + arg->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE; } - // For the CSE phase mark the GT_IND child as GTF_DONT_CSE - // it will end up with the same value number as tree (the GT_ADDR). - arg->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE; } else { @@ -5485,7 +5673,76 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd) ValueNum newUniq = vnStore->VNForExpr(tree->TypeGet()); tree->gtVNPair = vnStore->VNPWithExc(ValueNumPair(newUniq, newUniq), addrXvnp); } - // Skip GT_IND nodes on that are the LHS of an assignment. (We labeled these earlier.) + // We always want to evaluate the LHS when the GT_IND node is marked with GTF_IND_ARR_INDEX + // as this will relabel the GT_IND child correctly using the VNF_PtrToArrElem + else if ((tree->gtFlags & GTF_IND_ARR_INDEX) != 0) + { + ArrayInfo arrInfo; + bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo); + assert(b); + + ValueNum inxVN = ValueNumStore::NoVN; + FieldSeqNode* fldSeq = nullptr; + + // GenTreePtr addr = tree->gtOp.gtOp1; + ValueNum addrVN = addrNvnp.GetLiberal(); + + // Try to parse it. + GenTreePtr arr = nullptr; + addr->ParseArrayAddress(this, &arrInfo, &arr, &inxVN, &fldSeq); + if (arr == nullptr) + { + tree->gtVNPair.SetBoth(vnStore->VNForExpr(tree->TypeGet())); + return; + } + assert(fldSeq != FieldSeqStore::NotAField()); + + // Otherwise... + // Need to form H[arrType][arr][ind][fldSeq] + // Get the array element type equivalence class rep. + CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType); + ValueNum elemTypeEqVN = vnStore->VNForHandle(ssize_t(elemTypeEq), GTF_ICON_CLASS_HDL); + + // We take the "VNNormVal"s here, because if either has exceptional outcomes, they will be captured + // as part of the value of the composite "addr" operation... + ValueNum arrVN = vnStore->VNNormVal(arr->gtVNPair.GetLiberal()); + inxVN = vnStore->VNNormVal(inxVN); + + // Additionally, relabel the address with a PtrToArrElem value number. + ValueNum fldSeqVN = vnStore->VNForFieldSeq(fldSeq); + ValueNum elemAddr = vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem, elemTypeEqVN, arrVN, inxVN, fldSeqVN); + + // The aggregate "addr" VN should have had all the exceptions bubble up... + elemAddr = vnStore->VNWithExc(elemAddr, addrXvnp.GetLiberal()); + addr->gtVNPair.SetBoth(elemAddr); +#ifdef DEBUG + if (verbose) + { + printf(" Relabeled IND_ARR_INDEX address node "); + Compiler::printTreeID(addr); + printf(" with l:" STR_VN "%x: ", elemAddr); + vnStore->vnDump(this, elemAddr); + printf("\n"); + if (vnStore->VNNormVal(elemAddr) != elemAddr) + { + printf(" [" STR_VN "%x is: ", vnStore->VNNormVal(elemAddr)); + vnStore->vnDump(this, vnStore->VNNormVal(elemAddr)); + printf("]\n"); + } + } +#endif // DEBUG + // We now need to retrieve the value number for the array element value + // and give this value number to the GT_IND node 'tree' + // We do this whenever we have an rvalue, or for the LHS when we have an "op=", + // but we don't do it for a normal LHS assignment into an array element. + // + if (evalAsgLhsInd || ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)) + { + fgValueNumberArrIndexVal(tree, elemTypeEq, arrVN, inxVN, addrXvnp.GetLiberal(), fldSeq); + } + } + + // In general we skip GT_IND nodes on that are the LHS of an assignment. (We labeled these earlier.) // We will "evaluate" this as part of the assignment. (Unless we're explicitly told by // the caller to evaluate anyway -- perhaps the assignment is an "op=" assignment.) else if (((tree->gtFlags & GTF_IND_ASG_LHS) == 0) || evalAsgLhsInd) @@ -5542,69 +5799,6 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd) { fgValueNumberArrIndexVal(tree, &funcApp, addrXvnp.GetLiberal()); } - else if (tree->gtFlags & GTF_IND_ARR_INDEX) - { - ArrayInfo arrInfo; - bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo); - assert(b); - - ValueNum inxVN = ValueNumStore::NoVN; - FieldSeqNode* fldSeq = nullptr; - - // GenTreePtr addr = tree->gtOp.gtOp1; - ValueNum addrVN = addrNvnp.GetLiberal(); - - // Try to parse it. - GenTreePtr arr = nullptr; - addr->ParseArrayAddress(this, &arrInfo, &arr, &inxVN, &fldSeq); - if (arr == nullptr) - { - tree->gtVNPair.SetBoth(vnStore->VNForExpr(tree->TypeGet())); - return; - } - assert(fldSeq != FieldSeqStore::NotAField()); - - // Otherwise... - // Need to form H[arrType][arr][ind][fldSeq] - // Get the array element type equivalence class rep. - CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType); - ValueNum elemTypeEqVN = vnStore->VNForHandle(ssize_t(elemTypeEq), GTF_ICON_CLASS_HDL); - - // We take the "VNNormVal"s here, because if either has exceptional outcomes, they will be captured - // as part of the value of the composite "addr" operation... - ValueNum arrVN = vnStore->VNNormVal(arr->gtVNPair.GetLiberal()); - inxVN = vnStore->VNNormVal(inxVN); - - // Additionally, relabel the address with a PtrToArrElem value number. - ValueNum fldSeqVN = vnStore->VNForFieldSeq(fldSeq); - ValueNum elemAddr = - vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem, elemTypeEqVN, arrVN, inxVN, fldSeqVN); - - // The aggregate "addr" VN should have had all the exceptions bubble up... - elemAddr = vnStore->VNWithExc(elemAddr, addrXvnp.GetLiberal()); - - addr->gtVNPair.SetLiberal(elemAddr); - // We never care about a heap address in conservative mode; no use could properly depend on it. - addr->gtVNPair.SetConservative(vnStore->VNForExpr(TYP_BYREF)); -#ifdef DEBUG - if (verbose) - { - printf(" Relabeled IND_ARR_INDEX address node "); - Compiler::printTreeID(addr); - printf(" with l:" STR_VN "%x: ", elemAddr); - vnStore->vnDump(this, elemAddr); - printf("\n"); - if (vnStore->VNNormVal(elemAddr) != elemAddr) - { - printf(" [" STR_VN "%x is: ", vnStore->VNNormVal(elemAddr)); - vnStore->vnDump(this, vnStore->VNNormVal(elemAddr)); - printf("]\n"); - } - } -#endif // DEBUG - - fgValueNumberArrIndexVal(tree, elemTypeEq, arrVN, inxVN, addrXvnp.GetLiberal(), fldSeq); - } else if (addr->IsFieldAddr(this, &obj, &staticOffset, &fldSeq2)) { if (fldSeq2 == FieldSeqStore::NotAField()) @@ -5682,6 +5876,10 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd) { fgValueNumberCastTree(tree); } + else if (tree->OperGet() == GT_INTRINSIC) + { + fgValueNumberIntrinsic(tree); + } else if (ValueNumStore::VNFuncIsLegal(GetVNFuncForOper(oper, (tree->gtFlags & GTF_UNSIGNED) != 0))) { if (GenTree::OperIsUnary(oper)) @@ -5711,15 +5909,6 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd) } } } - else if (oper == GT_MATH) // GT_MATH is a currently a subtype of binary operators, even though all - // the functions supported take only a single argument. Future-proofing, I guess. - { - GenTreeMath* mth = tree->AsMath(); - // All take a single double arg. - ValueNumPair arg0VNP; ValueNumPair arg0VNPx = ValueNumStore::VNPForEmptyExcSet(); - vnStore->VNPUnpackExc(mth->gtOp.gtOp1->gtVNPair, &arg0VNP, &arg0VNPx); - mth->gtVNPair = vnStore->VNPWithExc(vnStore->EvalMathFunc(tree->TypeGet(), mth->gtMathFN, arg0VNP), arg0VNPx); - } else { assert(!GenTree::OperIsAssignment(oper)); // We handled assignments earlier. @@ -5847,7 +6036,6 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd) case GT_LOCKADD: // Binop case GT_XADD: // Binop case GT_XCHG: // Binop - case GT_CMPXCHG: // Specialop // For CMPXCHG and other intrinsics add an arbitrary side effect on Heap. fgMutateHeap(tree DEBUGARG("Interlocked intrinsic")); tree->gtVNPair.SetBoth(vnStore->VNForExpr(tree->TypeGet())); @@ -5894,6 +6082,12 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd) } break; + case GT_CMPXCHG: // Specialop + // For CMPXCHG and other intrinsics add an arbitrary side effect on Heap. + fgMutateHeap(tree DEBUGARG("Interlocked intrinsic")); + tree->gtVNPair.SetBoth(vnStore->VNForExpr(tree->TypeGet())); + break; + default: tree->gtVNPair.SetBoth(vnStore->VNForExpr(tree->TypeGet())); } @@ -5919,6 +6113,72 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd) #endif // DEBUG } +void Compiler::fgValueNumberIntrinsic(GenTreePtr tree) +{ + assert(tree->OperGet() == GT_INTRINSIC); + GenTreeIntrinsic* intrinsic = tree->AsIntrinsic(); + ValueNumPair arg0VNP, arg1VNP; + ValueNumPair arg0VNPx = ValueNumStore::VNPForEmptyExcSet(); + ValueNumPair arg1VNPx = ValueNumStore::VNPForEmptyExcSet(); + + vnStore->VNPUnpackExc(intrinsic->gtOp.gtOp1->gtVNPair, &arg0VNP, &arg0VNPx); + + if (intrinsic->gtOp.gtOp2 != nullptr) + { + vnStore->VNPUnpackExc(intrinsic->gtOp.gtOp2->gtVNPair, &arg1VNP, &arg1VNPx); + } + + switch (intrinsic->gtIntrinsicId) + { + case CORINFO_INTRINSIC_Sin: + case CORINFO_INTRINSIC_Sqrt: + case CORINFO_INTRINSIC_Abs: + case CORINFO_INTRINSIC_Cos: + case CORINFO_INTRINSIC_Round: + case CORINFO_INTRINSIC_Cosh: + case CORINFO_INTRINSIC_Sinh: + case CORINFO_INTRINSIC_Tan: + case CORINFO_INTRINSIC_Tanh: + case CORINFO_INTRINSIC_Asin: + case CORINFO_INTRINSIC_Acos: + case CORINFO_INTRINSIC_Atan: + case CORINFO_INTRINSIC_Atan2: + case CORINFO_INTRINSIC_Log10: + case CORINFO_INTRINSIC_Pow: + case CORINFO_INTRINSIC_Exp: + case CORINFO_INTRINSIC_Ceiling: + case CORINFO_INTRINSIC_Floor: + + // GT_INTRINSIC is a currently a subtype of binary operators. But most of + // the math intrinsics are actually unary operations. + + if (intrinsic->gtOp.gtOp2 == nullptr) + { + intrinsic->gtVNPair = vnStore->VNPWithExc( + vnStore->EvalMathFuncUnary(tree->TypeGet(), + intrinsic->gtIntrinsicId, + arg0VNP), + arg0VNPx); + } + else + { + ValueNumPair newVNP = vnStore->EvalMathFuncBinary(tree->TypeGet(), intrinsic->gtIntrinsicId, arg0VNP, arg1VNP); + ValueNumPair excSet = vnStore->VNPExcSetUnion(arg0VNPx, arg1VNPx); + intrinsic->gtVNPair = vnStore->VNPWithExc(newVNP, excSet); + } + + break; + + case CORINFO_INTRINSIC_Object_GetType: + intrinsic->gtVNPair = vnStore->VNPWithExc( + vnStore->VNPairForFunc(intrinsic->TypeGet(), VNF_ObjGetType, arg0VNP), arg0VNPx); + break; + + default: + unreached(); + } +} + void Compiler::fgValueNumberCastTree(GenTreePtr tree) { assert(tree->OperGet() == GT_CAST); diff --git a/src/jit/valuenum.h b/src/jit/valuenum.h index 4056bd0f54..6b32332351 100644 --- a/src/jit/valuenum.h +++ b/src/jit/valuenum.h @@ -515,6 +515,9 @@ public: // Returns true iff the VN represents a (non-handle) constant. bool IsVNConstant(ValueNum vn); + // Returns true iff the VN represents an integeral constant. + bool IsVNInt32Constant(ValueNum vn); + struct ArrLenArithBoundInfo { // (vnArr.len - 1) > vnOp @@ -549,6 +552,32 @@ public: #endif }; + struct ConstantBoundInfo + { + // 100 > vnOp + int constVal; + unsigned cmpOper; + ValueNum cmpOpVN; + + ConstantBoundInfo() + : constVal(0) + , cmpOper(GT_NONE) + , cmpOpVN(NoVN) + { + } + +#ifdef DEBUG + void dump(ValueNumStore* vnStore) + { + vnStore->vnDump(vnStore->m_pComp, cmpOpVN); + printf(" "); + printf(vnStore->VNFuncName((VNFunc)cmpOper)); + printf(" "); + printf("%d", constVal); + } +#endif + }; + // Check if "vn" is "new [] (type handle, size)" bool IsVNNewArr(ValueNum vn); @@ -561,6 +590,12 @@ public: // If "vn" is VN(a.len) then return VN(a); NoVN if VN(a) can't be determined. ValueNum GetArrForLenVn(ValueNum vn); + // Return true with any Relop except for == and != and one operand has to be a 32-bit integer constant. + bool IsVNConstantBound(ValueNum vn); + + // If "vn" is constant bound, then populate the "info" fields for constVal, cmpOp, cmpOper. + void GetConstantBoundInfo(ValueNum vn, ConstantBoundInfo* info); + // If "vn" is of the form "var < a.len" or "a.len <= var" return true. bool IsVNArrLenBound(ValueNum vn); @@ -685,12 +720,24 @@ public: bool IsHandle(ValueNum vn); // Requires "mthFunc" to be an intrinsic math function (one of the allowable values for the "gtMath" field - // of a GenTreeMath node). Return the value number for the application of this function to "arg0VN". - ValueNum EvalMathFunc(var_types typ, CorInfoIntrinsics mthFunc, ValueNum arg0VN); - ValueNumPair EvalMathFunc(var_types typ, CorInfoIntrinsics mthFunc, ValueNumPair arg0VNP) + // of a GenTreeMath node). For unary ops, return the value number for the application of this function to + // "arg0VN". For binary ops, return the value number for the application of this function to "arg0VN" and + // "arg1VN". + + ValueNum EvalMathFuncUnary(var_types typ, CorInfoIntrinsics mthFunc, ValueNum arg0VN); + + ValueNum EvalMathFuncBinary(var_types typ, CorInfoIntrinsics mthFunc, ValueNum arg0VN, ValueNum arg1VN); + + ValueNumPair EvalMathFuncUnary(var_types typ, CorInfoIntrinsics mthFunc, ValueNumPair arg0VNP) + { + return ValueNumPair(EvalMathFuncUnary(typ, mthFunc, arg0VNP.GetLiberal()), + EvalMathFuncUnary(typ, mthFunc, arg0VNP.GetConservative())); + } + + ValueNumPair EvalMathFuncBinary(var_types typ, CorInfoIntrinsics mthFunc, ValueNumPair arg0VNP, ValueNumPair arg1VNP) { - return ValueNumPair(EvalMathFunc(typ, mthFunc, arg0VNP.GetLiberal()), - EvalMathFunc(typ, mthFunc, arg0VNP.GetConservative())); + return ValueNumPair(EvalMathFuncBinary(typ, mthFunc, arg0VNP.GetLiberal(), arg1VNP.GetLiberal()), + EvalMathFuncBinary(typ, mthFunc, arg0VNP.GetConservative(), arg1VNP.GetConservative())); } // Returns "true" iff "vn" represents a function application. diff --git a/src/jit/valuenumfuncs.h b/src/jit/valuenumfuncs.h index aa352fe2b1..59111242ce 100644 --- a/src/jit/valuenumfuncs.h +++ b/src/jit/valuenumfuncs.h @@ -74,10 +74,23 @@ ValueNumFuncDef(Abs, 1, false, false, false) ValueNumFuncDef(RoundDouble, 1, false, false, false) ValueNumFuncDef(RoundFloat, 1, false, false, false) ValueNumFuncDef(RoundInt, 1, false, false, false) - +ValueNumFuncDef(Cosh, 1, false, false, false) +ValueNumFuncDef(Sinh, 1, false, false, false) +ValueNumFuncDef(Tan, 1, false, false, false) +ValueNumFuncDef(Tanh, 1, false, false, false) +ValueNumFuncDef(Asin, 1, false, false, false) +ValueNumFuncDef(Acos, 1, false, false, false) +ValueNumFuncDef(Atan, 1, false, false, false) +ValueNumFuncDef(Atan2, 2, false, false, false) +ValueNumFuncDef(Log10, 1, false, false, false) +ValueNumFuncDef(Pow, 2, false, false, false) +ValueNumFuncDef(Exp, 1, false, false, false) +ValueNumFuncDef(Ceiling, 1, false, false, false) +ValueNumFuncDef(Floor, 1, false, false, false) ValueNumFuncDef(ManagedThreadId, 0, false, false, false) +ValueNumFuncDef(ObjGetType, 1, false, false, false) ValueNumFuncDef(GetgenericsGcstaticBase, 1, false, true, true) ValueNumFuncDef(GetgenericsNongcstaticBase, 1, false, true, true) ValueNumFuncDef(GetsharedGcstaticBase, 2, false, true, true) diff --git a/src/jit/varset.h b/src/jit/varset.h index be581c9596..1aa30fcc2e 100644 --- a/src/jit/varset.h +++ b/src/jit/varset.h @@ -70,8 +70,18 @@ typedef BitSetOps</*BitSetType*/BitSetShortLongRep, typedef BitSetShortLongRep VARSET_TP; -// For the unbounded-width varset implementation, test it a little by allowing the max tracked vars to be than UINT64 can hold. +// Tested various sizes for max tracked locals. The largest value for which no throughput regression +// could be measured was 512. Going to 1024 showed the first throughput regressions. +// We anticipate the larger size will be needed to support better inlining. +// There were a number of failures when 512 was used for legacy, so we just retain the 128 value +// for legacy backend. + +#if !defined(LEGACY_BACKEND) +const unsigned lclMAX_TRACKED = 512; +#else const unsigned lclMAX_TRACKED = 128; +#endif + #define VARSET_REP_IS_CLASS 0 @@ -138,8 +148,7 @@ typedef BitSetOps</*BitSetType*/BitSetShortLongRep, typedef BitSetShortLongRep ALLVARSET_TP; -// For the unbounded-width varset implementation, test them a little by allowing the max tracked vars to be than UINT64 can hold. -const unsigned lclMAX_ALLSET_TRACKED = 128; +const unsigned lclMAX_ALLSET_TRACKED = lclMAX_TRACKED; #define ALLVARSET_REP_IS_CLASS 0 diff --git a/src/jit/vartype.h b/src/jit/vartype.h index ed7fae2dcb..4dec366cd2 100644 --- a/src/jit/vartype.h +++ b/src/jit/vartype.h @@ -18,6 +18,7 @@ enum var_types_classification VTF_GCR = 0x0008, // type is GC ref VTF_BYR = 0x0010, // type is Byref VTF_I = 0x0020, // is machine sized + VTF_S = 0x0040, // is a struct type }; DECLARE_TYPED_ENUM(var_types,BYTE) @@ -73,23 +74,18 @@ inline var_types TypeGet(var_types v) { return v; } template <class T> inline bool varTypeIsSIMD(T vt) { - if (TypeGet(vt) == TYP_SIMD12) + switch(TypeGet(vt)) { - return true; - } - - if (TypeGet(vt) == TYP_SIMD16) - { - return true; - } - + case TYP_SIMD8: + case TYP_SIMD12: + case TYP_SIMD16: #ifdef FEATURE_AVX_SUPPORT - if (TypeGet(vt) == TYP_SIMD32) - { + case TYP_SIMD32: +#endif // FEATURE_AVX_SUPPORT return true; + default: + return false; } -#endif // FEATURE_AVX_SUPPORT - return false; } #else // FEATURE_SIMD @@ -257,12 +253,24 @@ inline bool varTypeIsComposite(T vt) template <class T> inline bool varTypeIsPromotable(T vt) { - return (TypeGet(vt) == TYP_STRUCT || - (TypeGet(vt) == TYP_BLK) || + return (varTypeIsStruct(vt) + || (TypeGet(vt) == TYP_BLK) #if !defined(_TARGET_64BIT_) - varTypeIsLong(vt) || + || varTypeIsLong(vt) #endif // !defined(_TARGET_64BIT_) - varTypeIsSIMD(vt)); + ); +} + +template <class T> +inline bool varTypeIsStruct(T vt) +{ + return ((varTypeClassification[TypeGet(vt)] & VTF_S) != 0); +} + +template <class T> +inline bool varTypeIsEnregisterableStruct(T vt) +{ + return (TypeGet(vt) != TYP_STRUCT); } /*****************************************************************************/ diff --git a/src/mscorlib/GenerateCompilerResponseFile.targets b/src/mscorlib/GenerateCompilerResponseFile.targets index 21af5355bf..81999bcbe1 100644 --- a/src/mscorlib/GenerateCompilerResponseFile.targets +++ b/src/mscorlib/GenerateCompilerResponseFile.targets @@ -8,13 +8,13 @@ <Message Text="Generating module name response file: $(IntermediateOutputPath)\moduleName.rsp" /> <!-- When Roslyn is NOT enabled, we need to set the module name for mscorlib --> - <WriteLinesToFile Condition="'$(UseRoslynCompiler)'!='true'" + <WriteLinesToFile Condition="'$(TargetManagedCompiler)'!='Roslyn'" File="$(IntermediateOutputPath)\moduleName.rsp" Lines="/test:moduleName=CommonLanguageRuntimeLibrary" Overwrite="true" /> <!-- When Roslyn is enabled we need to set the runtimemetadataversion --> - <WriteLinesToFile Condition="'$(UseRoslynCompiler)'=='true'" + <WriteLinesToFile Condition="'$(TargetManagedCompiler)'=='Roslyn'" File="$(IntermediateOutputPath)\moduleName.rsp" Lines="/runtimemetadataversion:v4.0.30319" Overwrite="true" /> diff --git a/src/mscorlib/src/System/Security/Cryptography/CryptoConfig.cs b/src/mscorlib/src/System/Security/Cryptography/CryptoConfig.cs index 6a7ac090be..22106bf597 100644 --- a/src/mscorlib/src/System/Security/Cryptography/CryptoConfig.cs +++ b/src/mscorlib/src/System/Security/Cryptography/CryptoConfig.cs @@ -163,13 +163,6 @@ namespace System.Security.Cryptography { #if FEATURE_CRYPTO Type SHA1CryptoServiceProviderType = typeof(System.Security.Cryptography.SHA1CryptoServiceProvider); Type MD5CryptoServiceProviderType = typeof(System.Security.Cryptography.MD5CryptoServiceProvider); -#endif //FEATURE_CRYPTO -#if FEATURE_CRYPTO || FEATURE_LEGACYNETCFCRYPTO - Type SHA256ManagedType = typeof(SHA256Managed); -#endif //FEATURE_CRYPTO || FEATURE_LEGACYNETCFCRYPTO -#if FEATURE_CRYPTO - Type SHA384ManagedType = typeof(SHA384Managed); - Type SHA512ManagedType = typeof(SHA512Managed); Type RIPEMD160ManagedType = typeof(System.Security.Cryptography.RIPEMD160Managed); Type HMACMD5Type = typeof(System.Security.Cryptography.HMACMD5); Type HMACRIPEMD160Type = typeof(System.Security.Cryptography.HMACRIPEMD160); @@ -224,11 +217,26 @@ namespace System.Security.Cryptography { string SHA384CryptoSerivceProviderType = "System.Security.Cryptography.SHA384CryptoServiceProvider, " + AssemblyRef.SystemCore; string SHA512CngType = "System.Security.Cryptography.SHA512Cng, " + AssemblyRef.SystemCore; string SHA512CryptoServiceProviderType = "System.Security.Cryptography.SHA512CryptoServiceProvider, " + AssemblyRef.SystemCore; +#endif //FEATURE_CRYPTO + + +#if FEATURE_CRYPTO || FEATURE_LEGACYNETCFCRYPTO + bool fipsOnly = AllowOnlyFipsAlgorithms; + object SHA256DefaultType = typeof(SHA256Managed); +#endif //FEATURE_CRYPTO || FEATURE_LEGACYNETCFCRYPTO + +#if FEATURE_CRYPTO + if (fipsOnly) + { + SHA256DefaultType = SHA256CngType; + } + object SHA384DefaultType = fipsOnly ? (object)SHA384CngType : (object)typeof(SHA384Managed); + object SHA512DefaultType = fipsOnly ? (object)SHA512CngType : (object)typeof(SHA512Managed); // Cryptography algorithms in System.Security string DpapiDataProtectorType = "System.Security.Cryptography.DpapiDataProtector, " + AssemblyRef.SystemSecurity; - #endif //FEATURE_CRYPTO + #if FEATURE_CRYPTO || FEATURE_LEGACYNETCFCRYPTO // Random number generator ht.Add("RandomNumberGenerator", RNGCryptoServiceProviderType); @@ -247,21 +255,21 @@ namespace System.Security.Cryptography { ht.Add("System.Security.Cryptography.MD5Cng", MD5CngType); #endif //FEATURE_CRYPTO #if FEATURE_CRYPTO || FEATURE_LEGACYNETCFCRYPTO - ht.Add("SHA256", SHA256ManagedType); - ht.Add("SHA-256", SHA256ManagedType); - ht.Add("System.Security.Cryptography.SHA256", SHA256ManagedType); + ht.Add("SHA256", SHA256DefaultType); + ht.Add("SHA-256", SHA256DefaultType); + ht.Add("System.Security.Cryptography.SHA256", SHA256DefaultType); #endif //FEATURE_CRYPTO || FEATURE_LEGACYNETCFCRYPTO #if FEATURE_CRYPTO ht.Add("System.Security.Cryptography.SHA256Cng", SHA256CngType); ht.Add("System.Security.Cryptography.SHA256CryptoServiceProvider", SHA256CryptoServiceProviderType); - ht.Add("SHA384", SHA384ManagedType); - ht.Add("SHA-384", SHA384ManagedType); - ht.Add("System.Security.Cryptography.SHA384", SHA384ManagedType); + ht.Add("SHA384", SHA384DefaultType); + ht.Add("SHA-384", SHA384DefaultType); + ht.Add("System.Security.Cryptography.SHA384", SHA384DefaultType); ht.Add("System.Security.Cryptography.SHA384Cng", SHA384CngType); ht.Add("System.Security.Cryptography.SHA384CryptoServiceProvider", SHA384CryptoSerivceProviderType); - ht.Add("SHA512", SHA512ManagedType); - ht.Add("SHA-512", SHA512ManagedType); - ht.Add("System.Security.Cryptography.SHA512", SHA512ManagedType); + ht.Add("SHA512", SHA512DefaultType); + ht.Add("SHA-512", SHA512DefaultType); + ht.Add("System.Security.Cryptography.SHA512", SHA512DefaultType); ht.Add("System.Security.Cryptography.SHA512Cng", SHA512CngType); ht.Add("System.Security.Cryptography.SHA512CryptoServiceProvider", SHA512CryptoServiceProviderType); ht.Add("RIPEMD160", RIPEMD160ManagedType); @@ -354,10 +362,10 @@ namespace System.Security.Cryptography { // Add the other hash algorithms introduced with XML Encryption #endif //FEATURE_CRYPTO #if FEATURE_CRYPTO || FEATURE_LEGACYNETCFCRYPTO - ht.Add("http://www.w3.org/2001/04/xmlenc#sha256", SHA256ManagedType); + ht.Add("http://www.w3.org/2001/04/xmlenc#sha256", SHA256DefaultType); #endif //FEATURE_CRYPTO || FEATURE_LEGACYNETCFCRYPTO #if FEATURE_CRYPTO && !FEATURE_CORECLR - ht.Add("http://www.w3.org/2001/04/xmlenc#sha512", SHA512ManagedType); + ht.Add("http://www.w3.org/2001/04/xmlenc#sha512", SHA512DefaultType); ht.Add("http://www.w3.org/2001/04/xmlenc#ripemd160", RIPEMD160ManagedType); // Xml Encryption symmetric keys @@ -412,7 +420,7 @@ namespace System.Security.Cryptography { // Xml Dsig-more Uri's as defined in http://www.ietf.org/rfc/rfc4051.txt ht.Add("http://www.w3.org/2001/04/xmldsig-more#md5", MD5CryptoServiceProviderType); - ht.Add("http://www.w3.org/2001/04/xmldsig-more#sha384", SHA384ManagedType); + ht.Add("http://www.w3.org/2001/04/xmldsig-more#sha384", SHA384DefaultType); ht.Add("http://www.w3.org/2001/04/xmldsig-more#hmac-md5", HMACMD5Type); ht.Add("http://www.w3.org/2001/04/xmldsig-more#hmac-ripemd160", HMACRIPEMD160Type); #endif //FEATURE_CRYPTO diff --git a/src/mscorlib/src/System/Security/Cryptography/RSAPKCS1SignatureDeformatter.cs b/src/mscorlib/src/System/Security/Cryptography/RSAPKCS1SignatureDeformatter.cs index 8231644f47..1504a0d996 100644 --- a/src/mscorlib/src/System/Security/Cryptography/RSAPKCS1SignatureDeformatter.cs +++ b/src/mscorlib/src/System/Security/Cryptography/RSAPKCS1SignatureDeformatter.cs @@ -61,14 +61,21 @@ namespace System.Security.Cryptography { // Two cases here -- if we are talking to the CSP version or if we are talking to some other RSA provider. if (_rsaKey is RSACryptoServiceProvider) { + // This path is kept around for desktop compat: in case someone is using this with a hash algorithm that's known to GetAlgIdFromOid but + // not from OidToHashAlgorithmName. int calgHash = X509Utils.GetAlgIdFromOid(_strOID, OidGroup.HashAlgorithm); return ((RSACryptoServiceProvider)_rsaKey).VerifyHash(rgbHash, calgHash, rgbSignature); } else { +#if FEATURE_CORECLR byte[] pad = Utils.RsaPkcs1Padding(_rsaKey, CryptoConfig.EncodeOID(_strOID), rgbHash); // Apply the public key to the signature data to get back the padded buffer actually signed. // Compare the two buffers to see if they match; ignoring any leading zeros return Utils.CompareBigIntArrays(_rsaKey.EncryptValue(rgbSignature), pad); +#else + HashAlgorithmName hashAlgorithmName = Utils.OidToHashAlgorithmName(_strOID); + return _rsaKey.VerifyHash(rgbHash, rgbSignature, hashAlgorithmName, RSASignaturePadding.Pkcs1); +#endif } } } diff --git a/src/mscorlib/src/System/Security/Cryptography/RSAPKCS1SignatureFormatter.cs b/src/mscorlib/src/System/Security/Cryptography/RSAPKCS1SignatureFormatter.cs index dfcab8d2e2..bc1787686d 100644 --- a/src/mscorlib/src/System/Security/Cryptography/RSAPKCS1SignatureFormatter.cs +++ b/src/mscorlib/src/System/Security/Cryptography/RSAPKCS1SignatureFormatter.cs @@ -52,13 +52,20 @@ namespace System.Security.Cryptography { // Two cases here -- if we are talking to the CSP version or if we are talking to some other RSA provider. if (_rsaKey is RSACryptoServiceProvider) { + // This path is kept around for desktop compat: in case someone is using this with a hash algorithm that's known to GetAlgIdFromOid but + // not from OidToHashAlgorithmName. int calgHash = X509Utils.GetAlgIdFromOid(_strOID, OidGroup.HashAlgorithm); return ((RSACryptoServiceProvider)_rsaKey).SignHash(rgbHash, calgHash); } else { +#if FEATURE_CORECLR byte[] pad = Utils.RsaPkcs1Padding(_rsaKey, CryptoConfig.EncodeOID(_strOID), rgbHash); // Create the signature by applying the private key to the padded buffer we just created. return _rsaKey.DecryptValue(pad); +#else + HashAlgorithmName hashAlgorithmName = Utils.OidToHashAlgorithmName(_strOID); + return _rsaKey.SignHash(rgbHash, hashAlgorithmName, RSASignaturePadding.Pkcs1); +#endif } } } diff --git a/src/mscorlib/src/System/Security/Cryptography/Utils.cs b/src/mscorlib/src/System/Security/Cryptography/Utils.cs index 8b16e18126..26f7262558 100644 --- a/src/mscorlib/src/System/Security/Cryptography/Utils.cs +++ b/src/mscorlib/src/System/Security/Cryptography/Utils.cs @@ -962,6 +962,29 @@ namespace System.Security.Cryptography return true; } +#if !FEATURE_CORECLR + internal static HashAlgorithmName OidToHashAlgorithmName(string oid) + { + switch (oid) + { + case Constants.OID_OIWSEC_SHA1: + return HashAlgorithmName.SHA1; + + case Constants.OID_OIWSEC_SHA256: + return HashAlgorithmName.SHA256; + + case Constants.OID_OIWSEC_SHA384: + return HashAlgorithmName.SHA384; + + case Constants.OID_OIWSEC_SHA512: + return HashAlgorithmName.SHA512; + + default: + throw new NotSupportedException(); + } + } +#endif // FEATURE_CORECLR + [System.Security.SecurityCritical] // auto-generated [DllImport(JitHelpers.QCall, CharSet = CharSet.Unicode), SuppressUnmanagedCodeSecurity] internal static extern SafeHashHandle CreateHash(SafeProvHandle hProv, int algid); diff --git a/src/tools/crossgen/crossgen.nativeproj b/src/tools/crossgen/crossgen.nativeproj index 5377194c76..ed49920006 100644 --- a/src/tools/crossgen/crossgen.nativeproj +++ b/src/tools/crossgen/crossgen.nativeproj @@ -41,13 +41,8 @@ <ProjectReference>$(ClrSrcDirectory)zap\crossgen\zap_crossgen.nativeproj</ProjectReference> </TargetLib> - <!-- In the CodeGen branch, we use RyuJIT for all JIT builds --> - <TargetLib Condition="'$(MDILGenerator)' != 'true' and '$(_BuildBranch)' == 'CodeGen'" Include="$(ClrLibPath)\jit_crossgen.lib"> - <ProjectReference>$(ClrSrcDirectory)jit\crossgen\jit_crossgen.nativeproj</ProjectReference> - </TargetLib> - - <!-- In other branches, we build RyuJIT only for amd64 and arm64, and use JIT32 for ARM and x86 --> - <TargetLib Condition="'$(MDILGenerator)' != 'true' and '$(_BuildBranch)' != 'CodeGen'" Include="$(ClrLibPath)\jit_crossgen.lib"> + <!-- We build RyuJIT only for amd64 and arm64, and use JIT32 for ARM and x86 --> + <TargetLib Condition="'$(MDILGenerator)' != 'true'" Include="$(ClrLibPath)\jit_crossgen.lib"> <ProjectReference Condition="'$(_BuildArch)' == 'amd64' or '$(_BuildArch)' == 'arm64'">$(ClrSrcDirectory)jit\crossgen\jit_crossgen.nativeproj</ProjectReference> <ProjectReference Condition="'$(_BuildArch)' != 'amd64' and '$(_BuildArch)' != 'arm64'">$(ClrSrcDirectory)jit32\crossgen\jit_crossgen.nativeproj</ProjectReference> </TargetLib> diff --git a/src/utilcode/debug.cpp b/src/utilcode/debug.cpp index 6b9de4f82e..43de454d70 100644 --- a/src/utilcode/debug.cpp +++ b/src/utilcode/debug.cpp @@ -490,6 +490,11 @@ bool _DbgBreakCheck( switch(ret) { + case 0: +#if 0 + // The message box was not displayed. Tell caller to break. + return true; +#endif // For abort, just quit the app. case IDABORT: TerminateProcess(GetCurrentProcess(), 1); @@ -538,10 +543,6 @@ bool _DbgBreakCheck( psData->iLine = iLine; strcpy(psData->rcFile, szFile); break; - - case 0: - // The message box was not displayed. Tell caller to break. - return true; } return false; diff --git a/src/vm/arm64/asmhelpers.asm b/src/vm/arm64/asmhelpers.asm index 9c9cc70d1c..b3cc54a2db 100644 --- a/src/vm/arm64/asmhelpers.asm +++ b/src/vm/arm64/asmhelpers.asm @@ -38,6 +38,7 @@ IMPORT g_highest_address IMPORT g_card_table IMPORT g_TrapReturningThreads + IMPORT g_dispatch_cache_chain_success_counter TEXTAREA @@ -133,8 +134,8 @@ #endif ; If machine state is invalid, then simply exit - ldr x1, [x0, #MachState__isValid] - cmp x1, #0 + ldr w1, [x0, #MachState__isValid] + cmp w1, #0 beq Done RestoreRegMS 19, X19 @@ -212,35 +213,6 @@ ThePreStubPatchLabel ret lr LEAF_END - -;; ------------------------------------------------------------------ -;; void ResolveWorkerAsmStub(args in regs x0-x7 & stack, x11:IndirectionCellAndFlags, x12:DispatchToken) -;; -;; The stub dispatch thunk which transfers control to VSD_ResolveWorker. - NESTED_ENTRY ResolveWorkerAsmStub - - PROLOG_WITH_TRANSITION_BLOCK - - add x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock - and x1, x11, #-4 ; Indirection cell - mov x2, x12 ; DispatchToken - and x3, x11, #3 ; flag - bl VSD_ResolveWorker - mov x9, x0 - - EPILOG_WITH_TRANSITION_BLOCK_TAILCALL - - EPILOG_BRANCH_REG x9 - - NESTED_END - - NESTED_ENTRY ResolveWorkerChainLookupAsmStub - - // ARMSTUB TODO: implement chained lookup - b ResolveWorkerAsmStub - - NESTED_END - ;----------------------------------------------------------------------------- ; The following Macros help in WRITE_BARRIER Implemetations ; WRITE_BARRIER_ENTRY @@ -1026,6 +998,95 @@ FaultingExceptionFrame_FrameOffset SETA SIZEOF__GSCookie GenerateRedirectedStubWithFrame NakedThrowHelper, NakedThrowHelper2 +; ------------------------------------------------------------------ +; ResolveWorkerChainLookupAsmStub +; +; This method will perform a quick chained lookup of the entry if the +; initial cache lookup fails. +; +; On Entry: +; x9 contains the pointer to the current ResolveCacheElem +; x11 contains the address of the indirection (and the flags in the low two bits) +; x12 contains our contract the DispatchToken +; Must be preserved: +; x0 contains the instance object ref that we are making an interface call on +; [x1-x7] contains any additional register arguments for the interface method +; +; Loaded from x0 +; x13 contains our type the MethodTable (from object ref in x0) +; +; On Exit: +; x0, [x1-x7] arguments for the interface implementation target +; +; On Exit (to ResolveWorkerAsmStub): +; x11 contains the address of the indirection and the flags in the low two bits. +; x12 contains our contract (DispatchToken) +; + GBLA BACKPATCH_FLAG ; two low bit flags used by ResolveWorkerAsmStub + GBLA PROMOTE_CHAIN_FLAG ; two low bit flags used by ResolveWorkerAsmStub +BACKPATCH_FLAG SETA 1 +PROMOTE_CHAIN_FLAG SETA 2 + + NESTED_ENTRY ResolveWorkerChainLookupAsmStub + + tst x11, #BACKPATCH_FLAG ; First we check if x11 has the BACKPATCH_FLAG set + bne Fail ; If the BACKPATCH_FLAGS is set we will go directly to the ResolveWorkerAsmStub + + ldr x13, [x0] ; retrieve the MethodTable from the object ref in x0 +MainLoop + ldr x9, [x9, #24] ; x9 <= the next entry in the chain + cmp x9, #0 + beq Fail + + ldr x9, [x9, #0] + cmp x9, x13 ; compare our MT with the one in the ResolveCacheElem + bne MainLoop + + ldr x9, [x9, #8] + cmp x8, x12 ; compare our DispatchToken with one in the ResolveCacheElem + bne MainLoop + +Success + ldr x13, =g_dispatch_cache_chain_success_counter + ldr x9, [x13] + subs x9, x9, #1 + str x9, [x13] + blt Promote + + ldr x16, [x9, #16] ; get the ImplTarget + br x16 ; branch to interface implemenation target + +Promote + ; Move this entry to head postion of the chain + mov x9, #256 + str x9, [x13] ; be quick to reset the counter so we don't get a bunch of contending threads + orr x11, x11, #PROMOTE_CHAIN_FLAG ; set PROMOTE_CHAIN_FLAG + +Fail + b ResolveWorkerAsmStub ; call the ResolveWorkerAsmStub method to transition into the VM + + NESTED_END ResolveWorkerChainLookupAsmStub + +;; ------------------------------------------------------------------ +;; void ResolveWorkerAsmStub(args in regs x0-x7 & stack, x11:IndirectionCellAndFlags, x12:DispatchToken) +;; +;; The stub dispatch thunk which transfers control to VSD_ResolveWorker. + NESTED_ENTRY ResolveWorkerAsmStub + + PROLOG_WITH_TRANSITION_BLOCK + + add x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock + and x1, x11, #-4 ; Indirection cell + mov x2, x12 ; DispatchToken + and x3, x11, #3 ; flag + bl VSD_ResolveWorker + mov x9, x0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + + EPILOG_BRANCH_REG x9 + + NESTED_END ; Must be at very end of file END diff --git a/src/vm/arm64/cgencpu.h b/src/vm/arm64/cgencpu.h index ac9921a23c..0b551ae5f8 100644 --- a/src/vm/arm64/cgencpu.h +++ b/src/vm/arm64/cgencpu.h @@ -161,14 +161,14 @@ inline PCODE GetLR(const T_CONTEXT * context) { extern "C" LPVOID __stdcall GetCurrentSP(); -inline void SetSP(T_CONTEXT *context, TADDR esp) { +inline void SetSP(T_CONTEXT *context, TADDR sp) { LIMITED_METHOD_DAC_CONTRACT; - context->Sp = DWORD(esp); + context->Sp = DWORD64(sp); } -inline void SetFP(T_CONTEXT *context, TADDR ebp) { +inline void SetFP(T_CONTEXT *context, TADDR fp) { LIMITED_METHOD_DAC_CONTRACT; - context->Fp = DWORD(ebp); + context->Fp = DWORD64(fp); } inline TADDR GetFP(const T_CONTEXT * context) @@ -181,6 +181,16 @@ inline TADDR GetFP(const T_CONTEXT * context) void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target); #endif // FEATURE_COMINTEROP +inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode) +{ +#ifdef CROSSGEN_COMPILE + // The code won't be executed when we are cross-compiling so flush instruction cache is unnecessary + return TRUE; +#else + return FlushInstructionCache(GetCurrentProcess(), pCodeAddr, sizeOfCode); +#endif +} + //------------------------------------------------------------------------ inline void emitJump(UINT32* pCode, LPVOID target) { @@ -196,7 +206,11 @@ inline void emitJump(UINT32* pCode, LPVOID target) pCode[0] = 0x58000050UL; // ldr x16, [pc, #8] pCode[1] = 0xD61F0200UL; // br x16 + // Ensure that the updated instructions get updated in the I-Cache + ClrFlushInstructionCache(pCode, 8); + *((LPVOID *)(pCode + 2)) = target; // 64-bit target address + } //------------------------------------------------------------------------ @@ -436,15 +450,6 @@ struct HijackArgs // ARM64:NYI }; -inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode) -{ -#ifdef CROSSGEN_COMPILE - // The code won't be executed when we are cross-compiling so flush instruction cache is unnecessary - return TRUE; -#else - return FlushInstructionCache(GetCurrentProcess(), pCodeAddr, sizeOfCode); -#endif -} EXTERN_C VOID STDCALL PrecodeFixupThunk(); // Invalid precode type diff --git a/src/vm/arm64/crthelpers.asm b/src/vm/arm64/crthelpers.asm index d1ee84abd7..f9f516dbf4 100644 --- a/src/vm/arm64/crthelpers.asm +++ b/src/vm/arm64/crthelpers.asm @@ -55,7 +55,7 @@ ; } ; } ; -; while(count > 8) +; while(count >= 8) ; { ; *(uintptr_t*)dst = valEx; ; dst = (uintptr_t*)dst + 1; @@ -124,11 +124,9 @@ JIT_MemSet_0x60 cbnz x2,JIT_MemSet_0x60 JIT_MemSet_0x7c cmp x2,#8 - bls JIT_MemSet_0xb8 - mov x8,#-9 - add x8,x2,x8 - lsr x8,x8,#3 - add x11,x8,#1 + blo JIT_MemSet_0xb8 + lsr x8,x2,#3 + mov x11,x8 mov x10,x0 add x8,x10,x11 lsl #3 JIT_MemSet_0x9c @@ -195,7 +193,7 @@ JIT_MemSet_0xd8 ; } ; } ; -; while(count > 8) +; while(count >= 8) ; { ; *(uintptr_t*)dst = *(uintptr_t*)src; ; dst = (uintptr_t*)dst + 1; @@ -264,11 +262,8 @@ JIT_MemCpy_0x5c cbnz x2,JIT_MemCpy_0x5c JIT_MemCpy_0x80 cmp x2,#8 - bls JIT_MemCpy_0xb4 - mov x8,#-9 - add x8,x2,x8 - lsr x8,x8,#3 - add x9,x8,#1 + blo JIT_MemCpy_0xb4 + lsr x9,x2,#3 mov x8,#-8 madd x2,x9,x8,x2 JIT_MemCpy_0xa0 diff --git a/src/vm/arm64/virtualcallstubcpu.hpp b/src/vm/arm64/virtualcallstubcpu.hpp index c302cee9b7..607912409d 100644 --- a/src/vm/arm64/virtualcallstubcpu.hpp +++ b/src/vm/arm64/virtualcallstubcpu.hpp @@ -136,8 +136,8 @@ private: struct ResolveStub { inline PCODE failEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_failEntryPoint[0]; } - inline PCODE resolveEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_resolveEntryPoint[0]; } - inline PCODE slowEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_slowEntryPoint[0]; } + inline PCODE resolveEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_resolveEntryPoint[0]; } + inline PCODE slowEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_slowEntryPoint[0]; } inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; } inline INT32* pCounter() { LIMITED_METHOD_CONTRACT; return _pCounter; } @@ -147,7 +147,7 @@ struct ResolveStub private: friend struct ResolveHolder; - const static int resolveEntryPointLen = 19; + const static int resolveEntryPointLen = 17; const static int slowEntryPointLen = 4; const static int failEntryPointLen = 8; @@ -155,7 +155,7 @@ private: DWORD _slowEntryPoint[slowEntryPointLen]; DWORD _failEntryPoint[failEntryPointLen]; INT32* _pCounter; //Base of the Data Region - size_t _cacheAddress; // lookupCache + size_t _cacheAddress; // lookupCache size_t _token; PCODE _resolveWorkerTarget; UINT32 _hashedToken; @@ -173,13 +173,12 @@ struct ResolveHolder DWORD offset; int br_nextEntry[2]; /******** Rough Convention of used in this routine - ;;x9 current variable + ;;x9 hash scratch / current ResolveCacheElem ;;x10 base address of the data region ;;x11 indirection cell - ;;x12 passed MT - ;;X13 data read from data region - ;;X14 computed address into data region - ;;X15 this._token + ;;x12 MethodTable (from object ref in x0), out: this._token + ;;X13 temp + ;;X15 temp, this._token ;;cachemask => [CALL_STUB_CACHE_MASK * sizeof(void*)] *********/ // Called directly by JITTED code @@ -188,12 +187,16 @@ struct ResolveHolder // MethodTable mt = x0.m_pMethTab; // int i = ((mt + mt >> 12) ^ this._hashedToken) & _cacheMask // ResolveCacheElem e = this._cacheAddress + i - // do + // x9 = e = this._cacheAddress + i + // if (mt == e.pMT && this._token == e.token) // { - // if (mt == e.pMT && this._token == e.token) (e.target)(x0, x1,...,x7); - // e = e.pNext; - // } while (e != null) - // (this._slowEntryPoint)(x0, x1,.., x7, x11); + // (e.target)(x0, [x1,...,x7]); + // } + // else + // { + // x12 = this._token; + // (this._slowEntryPoint)(x0, [x1,.., x7], x9, x11, x12); + // } // } // @@ -236,16 +239,11 @@ struct ResolveHolder //ldr x9, [x13, x9] ;; x9 = e = this._cacheAddress + i _stub._resolveEntryPoint[n++] = 0xF86969A9 ; - //hoisting loop invariant this._token F940014F - // //ldr x15, [x10 + DATA_OFFSET(_token)] offset = DATA_OFFSET(_token); _ASSERTE(offset >=0 && offset%8 == 0); _stub._resolveEntryPoint[n++] = 0xF940014F | offset<<7; - int loop=n; - //do { - //;; Check mt == e.pMT // // @@ -292,26 +290,14 @@ struct ResolveHolder { _stub._resolveEntryPoint[i] = 0x54000001 | ((((n-i)*sizeof(DWORD))<<3) & 0x3FFFFFF); } - - //;; e = e.pNext; - //ldr x9, [x9, #offsetof(ResolveCacheElem,pNext ) ] - offset = offsetof(ResolveCacheElem, pNext) & 0xffffffff; - _ASSERTE(offset >=0 && offset%8 == 0); - _stub._resolveEntryPoint[n++] = 0xF9400129 | offset<<7; - - //;; } while(e != null); - //;; cbnz x9, loop - offset = (DWORD)(loop - n)*sizeof(DWORD); - _stub._resolveEntryPoint[n++] = 0xB5000009 | ((offset<<3) & 0xFFFFF0); - _ASSERTE(n == ResolveStub::resolveEntryPointLen); _ASSERTE(_stub._resolveEntryPoint + n == _stub._slowEntryPoint); - // ResolveStub._slowEntryPoint(x0:MethodToken, x1..x7, x11:IndirectionCellAndFlags) + // ResolveStub._slowEntryPoint(x0:MethodToken, [x1..x7], x11:IndirectionCellAndFlags) // { // x12 = this._token; - // this._resolveWorkerTarget(x0,.., x7, x12); + // this._resolveWorkerTarget(x0, [x1..x7], x9, x11, x12); // } #undef PC_REL_OFFSET @@ -326,29 +312,28 @@ struct ResolveHolder //ldr x12, [x10 , DATA_OFFSET(_token)] offset=DATA_OFFSET(_token); _ASSERTE(offset >=0 && offset%8 == 0); - _stub._slowEntryPoint[n++] = 0xF940014C | (offset<<7); + _stub._slowEntryPoint[n++] = 0xF940014C | (offset<<7); // - //ldr x9, [x10 , DATA_OFFSET(_resolveWorkerTarget)] + //ldr x13, [x10 , DATA_OFFSET(_resolveWorkerTarget)] offset=DATA_OFFSET(_resolveWorkerTarget); _ASSERTE(offset >=0 && offset%8 == 0); - _stub._slowEntryPoint[n++] = 0xF9400149 | (offset<<7); + _stub._slowEntryPoint[n++] = 0xF940014d | (offset<<7); - // br x9 - _stub._slowEntryPoint[n++] = 0xD61F0120; + // br x13 + _stub._slowEntryPoint[n++] = 0xD61F01A0; _ASSERTE(n == ResolveStub::slowEntryPointLen); // ResolveStub._failEntryPoint(x0:MethodToken, x1,.., x7, x11:IndirectionCellAndFlags) // { // if(--*(this._pCounter) < 0) x11 = x11 | SDF_ResolveBackPatch; - // this._resolveEntryPoint(x0,..,x7); + // this._resolveEntryPoint(x0, [x1..x7]); // } #undef PC_REL_OFFSET //NOTE Offset can be negative #define PC_REL_OFFSET(_field) (DWORD)((offsetof(ResolveStub, _field) - (offsetof(ResolveStub, _failEntryPoint[n]))) & 0xffffffff) n = 0; - //;;failEntryPoint //;;adr x10, #Dataregionbase _stub._failEntryPoint[n++] = 0x1000000A | ARM64EncodeHelpers::ADR_PATCH(PC_REL_OFFSET(Dataregionbase)); @@ -359,12 +344,12 @@ struct ResolveHolder _ASSERTE(offset >=0 && offset%8 == 0); _stub._failEntryPoint[n++] = 0xF940014D | offset<<7; - //ldr x9, [x13] - _stub._failEntryPoint[n++] = 0xF94001A9; - //subs x9,x9,#1 - _stub._failEntryPoint[n++] = 0xF1000529; - //str x9, [x13] - _stub._failEntryPoint[n++] = 0xF90001A9; + //ldr w9, [x13] + _stub._failEntryPoint[n++] = 0xB94001A9; + //subs w9,w9,#1 + _stub._failEntryPoint[n++] = 0x71000529; + //str w9, [x13] + _stub._failEntryPoint[n++] = 0xB90001A9; //;;bge resolveEntryPoint offset = PC_REL_OFFSET(_resolveEntryPoint); diff --git a/src/vm/ecalllist.h b/src/vm/ecalllist.h index 1266615ab7..0c8b063154 100644 --- a/src/vm/ecalllist.h +++ b/src/vm/ecalllist.h @@ -1256,24 +1256,24 @@ FCFuncStart(gMathFuncs) FCIntrinsic("Round", COMDouble::Round, CORINFO_INTRINSIC_Round) FCIntrinsicSig("Abs", &gsig_SM_Flt_RetFlt, COMDouble::AbsFlt, CORINFO_INTRINSIC_Abs) FCIntrinsicSig("Abs", &gsig_SM_Dbl_RetDbl, COMDouble::AbsDbl, CORINFO_INTRINSIC_Abs) - FCFuncElement("Exp", COMDouble::Exp) - FCFuncElement("Pow", COMDouble::Pow) + FCIntrinsic("Exp", COMDouble::Exp, CORINFO_INTRINSIC_Exp) + FCIntrinsic("Pow", COMDouble::Pow, CORINFO_INTRINSIC_Pow) #if defined(_TARGET_X86_) FCUnreferenced FCFuncElement("PowHelperSimple", COMDouble::PowHelperSimple) FCUnreferenced FCFuncElement("PowHelper", COMDouble::PowHelper) #endif - FCFuncElement("Tan", COMDouble::Tan) - FCFuncElement("Floor", COMDouble::Floor) + FCIntrinsic("Tan", COMDouble::Tan, CORINFO_INTRINSIC_Tan) + FCIntrinsic("Floor", COMDouble::Floor, CORINFO_INTRINSIC_Floor) FCFuncElement("Log", COMDouble::Log) - FCFuncElement("Sinh", COMDouble::Sinh) - FCFuncElement("Cosh", COMDouble::Cosh) - FCFuncElement("Tanh", COMDouble::Tanh) - FCFuncElement("Acos", COMDouble::Acos) - FCFuncElement("Asin", COMDouble::Asin) - FCFuncElement("Atan", COMDouble::Atan) - FCFuncElement("Atan2", COMDouble::Atan2) - FCFuncElement("Log10", COMDouble::Log10) - FCFuncElement("Ceiling", COMDouble::Ceil) + FCIntrinsic("Sinh", COMDouble::Sinh, CORINFO_INTRINSIC_Sinh) + FCIntrinsic("Cosh", COMDouble::Cosh, CORINFO_INTRINSIC_Cosh) + FCIntrinsic("Tanh", COMDouble::Tanh, CORINFO_INTRINSIC_Tanh) + FCIntrinsic("Acos", COMDouble::Acos, CORINFO_INTRINSIC_Acos) + FCIntrinsic("Asin", COMDouble::Asin, CORINFO_INTRINSIC_Asin) + FCIntrinsic("Atan", COMDouble::Atan, CORINFO_INTRINSIC_Atan) + FCIntrinsic("Atan2", COMDouble::Atan2, CORINFO_INTRINSIC_Atan2) + FCIntrinsic("Log10", COMDouble::Log10, CORINFO_INTRINSIC_Log10) + FCIntrinsic("Ceiling", COMDouble::Ceil, CORINFO_INTRINSIC_Ceiling) FCFuncElement("SplitFractionDouble", COMDouble::ModFDouble) FCFuncEnd() @@ -1871,9 +1871,9 @@ FCFuncStart(gCompilerFuncs) FCFuncElement("ExecuteCodeWithGuaranteedCleanup", ReflectionInvocation::ExecuteCodeWithGuaranteedCleanup) FCFuncElement("GetHashCode", ObjectNative::GetHashCode) FCFuncElement("Equals", ObjectNative::Equals) - FCFuncElement("EnsureSufficientExecutionStack", ReflectionInvocation::EnsureSufficientExecutionStack) + FCFuncElement("EnsureSufficientExecutionStack", ReflectionInvocation::EnsureSufficientExecutionStack) #ifdef FEATURE_CORECLR - FCFuncElement("TryEnsureSufficientExecutionStack", ReflectionInvocation::TryEnsureSufficientExecutionStack) + FCFuncElement("TryEnsureSufficientExecutionStack", ReflectionInvocation::TryEnsureSufficientExecutionStack) #endif // FEATURE_CORECLR FCFuncEnd() @@ -2017,7 +2017,7 @@ FCFuncStart(gStubHelperFuncs) FCFuncElement("MarshalToManagedVaListInternal", StubHelpers::MarshalToManagedVaListInternal) FCFuncElement("CalcVaListSize", StubHelpers::CalcVaListSize) FCFuncElement("ValidateObject", StubHelpers::ValidateObject) - FCFuncElement("ValidateByref", StubHelpers::ValidateByref) + FCFuncElement("ValidateByref", StubHelpers::ValidateByref) FCFuncElement("LogPinnedArgument", StubHelpers::LogPinnedArgument) FCIntrinsic("GetStubContext", StubHelpers::GetStubContext, CORINFO_INTRINSIC_StubHelpers_GetStubContext) #ifdef _WIN64 diff --git a/src/vm/gccover.cpp b/src/vm/gccover.cpp index 788f6a044d..8a591d9faa 100644 --- a/src/vm/gccover.cpp +++ b/src/vm/gccover.cpp @@ -1348,7 +1348,7 @@ void DoGcStress (PCONTEXT regs, MethodDesc *pMD) // If some other thread removes interrupt points, we abandon epilog testing // for this routine since the barrier at the begining of the routine may not - // be up anymore, and thus the caller context is now not guarenteed to be correct. + // be up anymore, and thus the caller context is now not guaranteed to be correct. // This should happen only very rarely so is not a big deal. if (gcCover->callerThread != pThread) gcCover->doingEpilogChecks = false; diff --git a/src/vm/virtualcallstub.cpp b/src/vm/virtualcallstub.cpp index a0672c0027..1d5b3abb17 100644 --- a/src/vm/virtualcallstub.cpp +++ b/src/vm/virtualcallstub.cpp @@ -83,7 +83,11 @@ UINT32 g_bucket_space_dead = 0; //# of bytes of abandoned buckets not ye // This is the number of times a successful chain lookup will occur before the // entry is promoted to the front of the chain. This is declared as extern because // the default value (CALL_STUB_CACHE_INITIAL_SUCCESS_COUNT) is defined in the header. +#ifdef _TARGET_ARM64_ +extern "C" size_t g_dispatch_cache_chain_success_counter; +#else extern size_t g_dispatch_cache_chain_success_counter; +#endif #define DECLARE_DATA #include "virtualcallstub.h" |