summaryrefslogtreecommitdiff
path: root/src/jit/compiler.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/compiler.cpp')
-rw-r--r--src/jit/compiler.cpp1399
1 files changed, 997 insertions, 402 deletions
diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
index afbecdfc60..114847c0d0 100644
--- a/src/jit/compiler.cpp
+++ b/src/jit/compiler.cpp
@@ -48,6 +48,60 @@ bool Compiler::s_pAltJitExcludeAssembliesListInitialized = false;
AssemblyNamesList2* Compiler::s_pAltJitExcludeAssembliesList = nullptr;
#endif // ALT_JIT
+/*****************************************************************************
+ *
+ * Little helpers to grab the current cycle counter value; this is done
+ * differently based on target architecture, host toolchain, etc. The
+ * main thing is to keep the overhead absolutely minimal; in fact, on
+ * x86/x64 we use RDTSC even though it's not thread-safe; GetThreadCycles
+ * (which is monotonous) is just too expensive.
+ */
+#ifdef FEATURE_JIT_METHOD_PERF
+
+#if defined(_HOST_X86_) || defined(_HOST_AMD64_)
+
+#if defined(_MSC_VER)
+
+#include <intrin.h>
+inline bool _our_GetThreadCycles(unsigned __int64* cycleOut)
+{
+ *cycleOut = __rdtsc();
+ return true;
+}
+
+#elif defined(__clang__)
+
+inline bool _our_GetThreadCycles(unsigned __int64* cycleOut)
+{
+ uint64_t cycles;
+ asm volatile("rdtsc" : "=A"(cycles));
+ *cycleOut = cycles;
+ return true;
+}
+
+#else // neither _MSC_VER nor __clang__
+
+// The following *might* work - might as well try.
+#define _our_GetThreadCycles(cp) GetThreadCycles(cp)
+
+#endif
+
+#elif defined(_HOST_ARM_) || defined(_HOST_ARM64_)
+
+// If this doesn't work please see ../gc/gc.cpp for additional ARM
+// info (and possible solutions).
+#define _our_GetThreadCycles(cp) GetThreadCycles(cp)
+
+#else // not x86/x64 and not ARM
+
+// Don't know what this target is, but let's give it a try; if
+// someone really wants to make this work, please add the right
+// code here.
+#define _our_GetThreadCycles(cp) GetThreadCycles(cp)
+
+#endif // which host OS
+
+#endif // FEATURE_JIT_METHOD_PERF
/*****************************************************************************/
inline unsigned getCurTime()
{
@@ -147,8 +201,6 @@ void Compiler::compDspSrcLinesByLineNum(unsigned line, bool seek)
void Compiler::compDspSrcLinesByNativeIP(UNATIVE_OFFSET curIP)
{
-#ifdef DEBUGGING_SUPPORT
-
static IPmappingDsc* nextMappingDsc;
static unsigned lastLine;
@@ -203,8 +255,6 @@ void Compiler::compDspSrcLinesByNativeIP(UNATIVE_OFFSET curIP)
nextMappingDsc = nextMappingDsc->ipmdNext;
}
}
-
-#endif
}
/*****************************************************************************/
@@ -232,6 +282,15 @@ unsigned genTreeNsizHistBuckets[] = {1000, 5000, 10000, 50000, 100000, 500000,
Histogram genTreeNsizHist(HostAllocator::getHostAllocator(), genTreeNsizHistBuckets);
#endif // MEASURE_NODE_SIZE
+/*****************************************************************************/
+#if MEASURE_MEM_ALLOC
+
+unsigned memSizeHistBuckets[] = {20, 50, 75, 100, 150, 250, 500, 1000, 5000, 0};
+Histogram memAllocHist(HostAllocator::getHostAllocator(), memSizeHistBuckets);
+Histogram memUsedHist(HostAllocator::getHostAllocator(), memSizeHistBuckets);
+
+#endif // MEASURE_MEM_ALLOC
+
/*****************************************************************************
*
* Variables to keep track of total code amounts.
@@ -475,7 +534,7 @@ bool Compiler::isSingleFloat32Struct(CORINFO_CLASS_HANDLE clsHnd)
for (;;)
{
// all of class chain must be of value type and must have only one field
- if (!info.compCompHnd->isValueClass(clsHnd) && info.compCompHnd->getClassNumInstanceFields(clsHnd) != 1)
+ if (!info.compCompHnd->isValueClass(clsHnd) || info.compCompHnd->getClassNumInstanceFields(clsHnd) != 1)
{
return false;
}
@@ -1101,14 +1160,11 @@ size_t genFlowNodeCnt;
#ifdef DEBUG
/* static */
unsigned Compiler::s_compMethodsCount = 0; // to produce unique label names
-
-/* static */
-bool Compiler::s_dspMemStats = false;
#endif
-#ifndef DEBUGGING_SUPPORT
+#if MEASURE_MEM_ALLOC
/* static */
-const bool Compiler::Options::compDbgCode = false;
+bool Compiler::s_dspMemStats = false;
#endif
#ifndef PROFILING_SUPPORTED
@@ -1184,18 +1240,22 @@ void Compiler::compShutdown()
}
#endif
+#if NODEBASH_STATS
+ GenTree::ReportOperBashing(jitstdout);
+#endif
+
// Where should we write our statistics output?
FILE* fout = jitstdout;
#ifdef FEATURE_JIT_METHOD_PERF
- if (compJitTimeLogFilename != NULL)
+ if (compJitTimeLogFilename != nullptr)
{
- // I assume that this will return NULL if it fails for some reason, and
- // that...
FILE* jitTimeLogFile = _wfopen(compJitTimeLogFilename, W("a"));
- // ...Print will return silently with a NULL argument.
- CompTimeSummaryInfo::s_compTimeSummary.Print(jitTimeLogFile);
- fclose(jitTimeLogFile);
+ if (jitTimeLogFile != nullptr)
+ {
+ CompTimeSummaryInfo::s_compTimeSummary.Print(jitTimeLogFile);
+ fclose(jitTimeLogFile);
+ }
}
#endif // FEATURE_JIT_METHOD_PERF
@@ -1214,6 +1274,63 @@ void Compiler::compShutdown()
}
#endif // COUNT_RANGECHECKS
+#if COUNT_AST_OPERS
+
+ // Add up all the counts so that we can show percentages of total
+ unsigned gtc = 0;
+ for (unsigned op = 0; op < GT_COUNT; op++)
+ gtc += GenTree::s_gtNodeCounts[op];
+
+ if (gtc > 0)
+ {
+ unsigned rem_total = gtc;
+ unsigned rem_large = 0;
+ unsigned rem_small = 0;
+
+ unsigned tot_large = 0;
+ unsigned tot_small = 0;
+
+ fprintf(fout, "\nGenTree operator counts (approximate):\n\n");
+
+ for (unsigned op = 0; op < GT_COUNT; op++)
+ {
+ unsigned siz = GenTree::s_gtTrueSizes[op];
+ unsigned cnt = GenTree::s_gtNodeCounts[op];
+ double pct = 100.0 * cnt / gtc;
+
+ if (siz > TREE_NODE_SZ_SMALL)
+ tot_large += cnt;
+ else
+ tot_small += cnt;
+
+ // Let's not show anything below a threshold
+ if (pct >= 0.5)
+ {
+ fprintf(fout, " GT_%-17s %7u (%4.1lf%%) %3u bytes each\n", GenTree::OpName((genTreeOps)op), cnt,
+ pct, siz);
+ rem_total -= cnt;
+ }
+ else
+ {
+ if (siz > TREE_NODE_SZ_SMALL)
+ rem_large += cnt;
+ else
+ rem_small += cnt;
+ }
+ }
+ if (rem_total > 0)
+ {
+ fprintf(fout, " All other GT_xxx ... %7u (%4.1lf%%) ... %4.1lf%% small + %4.1lf%% large\n", rem_total,
+ 100.0 * rem_total / gtc, 100.0 * rem_small / gtc, 100.0 * rem_large / gtc);
+ }
+ fprintf(fout, " -----------------------------------------------------\n");
+ fprintf(fout, " Total ....... %11u --ALL-- ... %4.1lf%% small + %4.1lf%% large\n", gtc,
+ 100.0 * tot_small / gtc, 100.0 * tot_large / gtc);
+ fprintf(fout, "\n");
+ }
+
+#endif // COUNT_AST_OPERS
+
#if DISPLAY_SIZES
if (grossVMsize && grossNCsize)
@@ -1367,17 +1484,23 @@ void Compiler::compShutdown()
#if MEASURE_MEM_ALLOC
-#ifdef DEBUG
- // Under debug, we only dump memory stats when the COMPlus_* variable is defined.
- // Under non-debug, we don't have the COMPlus_* variable, and we always dump it.
if (s_dspMemStats)
-#endif
{
fprintf(fout, "\nAll allocations:\n");
s_aggMemStats.Print(jitstdout);
fprintf(fout, "\nLargest method:\n");
s_maxCompMemStats.Print(jitstdout);
+
+ fprintf(fout, "\n");
+ fprintf(fout, "---------------------------------------------------\n");
+ fprintf(fout, "Distribution of total memory allocated per method (in KB):\n");
+ memAllocHist.dump(fout);
+
+ fprintf(fout, "\n");
+ fprintf(fout, "---------------------------------------------------\n");
+ fprintf(fout, "Distribution of total memory used per method (in KB):\n");
+ memUsedHist.dump(fout);
}
#endif // MEASURE_MEM_ALLOC
@@ -1452,100 +1575,8 @@ void Compiler::compDisplayStaticSizes(FILE* fout)
{
#if MEASURE_NODE_SIZE
- /*
- IMPORTANT: Use the following code to check the alignment of
- GenTree members (in a retail build, of course).
- */
-
- GenTree* gtDummy = nullptr;
-
- fprintf(fout, "\n");
- fprintf(fout, "Offset / size of gtOper = %2u / %2u\n", offsetof(GenTree, gtOper), sizeof(gtDummy->gtOper));
- fprintf(fout, "Offset / size of gtType = %2u / %2u\n", offsetof(GenTree, gtType), sizeof(gtDummy->gtType));
-#if FEATURE_ANYCSE
- fprintf(fout, "Offset / size of gtCSEnum = %2u / %2u\n", offsetof(GenTree, gtCSEnum),
- sizeof(gtDummy->gtCSEnum));
-#endif // FEATURE_ANYCSE
-#if ASSERTION_PROP
- fprintf(fout, "Offset / size of gtAssertionNum = %2u / %2u\n", offsetof(GenTree, gtAssertionNum),
- sizeof(gtDummy->gtAssertionNum));
-#endif // ASSERTION_PROP
-#if FEATURE_STACK_FP_X87
- fprintf(fout, "Offset / size of gtFPlvl = %2u / %2u\n", offsetof(GenTree, gtFPlvl),
- sizeof(gtDummy->gtFPlvl));
-#endif // FEATURE_STACK_FP_X87
- // TODO: The section that report GenTree sizes should be made into a public static member function of the GenTree
- // class (see https://github.com/dotnet/coreclr/pull/493)
- // fprintf(fout, "Offset / size of gtCostEx = %2u / %2u\n", offsetof(GenTree, _gtCostEx ),
- // sizeof(gtDummy->_gtCostEx ));
- // fprintf(fout, "Offset / size of gtCostSz = %2u / %2u\n", offsetof(GenTree, _gtCostSz ),
- // sizeof(gtDummy->_gtCostSz ));
- fprintf(fout, "Offset / size of gtFlags = %2u / %2u\n", offsetof(GenTree, gtFlags),
- sizeof(gtDummy->gtFlags));
- fprintf(fout, "Offset / size of gtVNPair = %2u / %2u\n", offsetof(GenTree, gtVNPair),
- sizeof(gtDummy->gtVNPair));
- fprintf(fout, "Offset / size of gtRsvdRegs = %2u / %2u\n", offsetof(GenTree, gtRsvdRegs),
- sizeof(gtDummy->gtRsvdRegs));
-#ifdef LEGACY_BACKEND
- fprintf(fout, "Offset / size of gtUsedRegs = %2u / %2u\n", offsetof(GenTree, gtUsedRegs),
- sizeof(gtDummy->gtUsedRegs));
-#endif // LEGACY_BACKEND
-#ifndef LEGACY_BACKEND
- fprintf(fout, "Offset / size of gtLsraInfo = %2u / %2u\n", offsetof(GenTree, gtLsraInfo),
- sizeof(gtDummy->gtLsraInfo));
-#endif // !LEGACY_BACKEND
- fprintf(fout, "Offset / size of gtNext = %2u / %2u\n", offsetof(GenTree, gtNext), sizeof(gtDummy->gtNext));
- fprintf(fout, "Offset / size of gtPrev = %2u / %2u\n", offsetof(GenTree, gtPrev), sizeof(gtDummy->gtPrev));
- fprintf(fout, "\n");
-
-#if SMALL_TREE_NODES
- fprintf(fout, "Small tree node size = %3u\n", TREE_NODE_SZ_SMALL);
-#endif // SMALL_TREE_NODES
- fprintf(fout, "Large tree node size = %3u\n", TREE_NODE_SZ_LARGE);
- fprintf(fout, "Size of GenTree = %3u\n", sizeof(GenTree));
- fprintf(fout, "Size of GenTreeUnOp = %3u\n", sizeof(GenTreeUnOp));
- fprintf(fout, "Size of GenTreeOp = %3u\n", sizeof(GenTreeOp));
- fprintf(fout, "Size of GenTreeVal = %3u\n", sizeof(GenTreeVal));
- fprintf(fout, "Size of GenTreeIntConCommon = %3u\n", sizeof(GenTreeIntConCommon));
- fprintf(fout, "Size of GenTreePhysReg = %3u\n", sizeof(GenTreePhysReg));
-#ifndef LEGACY_BACKEND
- fprintf(fout, "Size of GenTreeJumpTable = %3u\n", sizeof(GenTreeJumpTable));
-#endif // !LEGACY_BACKEND
- fprintf(fout, "Size of GenTreeIntCon = %3u\n", sizeof(GenTreeIntCon));
- fprintf(fout, "Size of GenTreeLngCon = %3u\n", sizeof(GenTreeLngCon));
- fprintf(fout, "Size of GenTreeDblCon = %3u\n", sizeof(GenTreeDblCon));
- fprintf(fout, "Size of GenTreeStrCon = %3u\n", sizeof(GenTreeStrCon));
- fprintf(fout, "Size of GenTreeLclVarCommon = %3u\n", sizeof(GenTreeLclVarCommon));
- fprintf(fout, "Size of GenTreeLclVar = %3u\n", sizeof(GenTreeLclVar));
- fprintf(fout, "Size of GenTreeLclFld = %3u\n", sizeof(GenTreeLclFld));
- fprintf(fout, "Size of GenTreeRegVar = %3u\n", sizeof(GenTreeRegVar));
- fprintf(fout, "Size of GenTreeCast = %3u\n", sizeof(GenTreeCast));
- fprintf(fout, "Size of GenTreeBox = %3u\n", sizeof(GenTreeBox));
- fprintf(fout, "Size of GenTreeField = %3u\n", sizeof(GenTreeField));
- fprintf(fout, "Size of GenTreeArgList = %3u\n", sizeof(GenTreeArgList));
- fprintf(fout, "Size of GenTreeColon = %3u\n", sizeof(GenTreeColon));
- fprintf(fout, "Size of GenTreeCall = %3u\n", sizeof(GenTreeCall));
- fprintf(fout, "Size of GenTreeCmpXchg = %3u\n", sizeof(GenTreeCmpXchg));
- fprintf(fout, "Size of GenTreeFptrVal = %3u\n", sizeof(GenTreeFptrVal));
- fprintf(fout, "Size of GenTreeQmark = %3u\n", sizeof(GenTreeQmark));
- fprintf(fout, "Size of GenTreeIntrinsic = %3u\n", sizeof(GenTreeIntrinsic));
- fprintf(fout, "Size of GenTreeIndex = %3u\n", sizeof(GenTreeIndex));
- fprintf(fout, "Size of GenTreeArrLen = %3u\n", sizeof(GenTreeArrLen));
- fprintf(fout, "Size of GenTreeBoundsChk = %3u\n", sizeof(GenTreeBoundsChk));
- fprintf(fout, "Size of GenTreeArrElem = %3u\n", sizeof(GenTreeArrElem));
- fprintf(fout, "Size of GenTreeAddrMode = %3u\n", sizeof(GenTreeAddrMode));
- fprintf(fout, "Size of GenTreeIndir = %3u\n", sizeof(GenTreeIndir));
- fprintf(fout, "Size of GenTreeStoreInd = %3u\n", sizeof(GenTreeStoreInd));
- fprintf(fout, "Size of GenTreeRetExpr = %3u\n", sizeof(GenTreeRetExpr));
- fprintf(fout, "Size of GenTreeStmt = %3u\n", sizeof(GenTreeStmt));
- fprintf(fout, "Size of GenTreeObj = %3u\n", sizeof(GenTreeObj));
- fprintf(fout, "Size of GenTreeClsVar = %3u\n", sizeof(GenTreeClsVar));
- fprintf(fout, "Size of GenTreeArgPlace = %3u\n", sizeof(GenTreeArgPlace));
- fprintf(fout, "Size of GenTreeLabel = %3u\n", sizeof(GenTreeLabel));
- fprintf(fout, "Size of GenTreePhiArg = %3u\n", sizeof(GenTreePhiArg));
- fprintf(fout, "Size of GenTreePutArgStk = %3u\n", sizeof(GenTreePutArgStk));
- fprintf(fout, "\n");
-#endif // MEASURE_NODE_SIZE
+ GenTree::DumpNodeSizes(fout);
+#endif
#if MEASURE_BLOCK_SIZE
@@ -1572,8 +1603,6 @@ void Compiler::compDisplayStaticSizes(FILE* fout)
sizeof(bbDummy->bbJumpDest));
fprintf(fout, "Offset / size of bbJumpSwt = %3u / %3u\n", offsetof(BasicBlock, bbJumpSwt),
sizeof(bbDummy->bbJumpSwt));
- fprintf(fout, "Offset / size of bbTreeList = %3u / %3u\n", offsetof(BasicBlock, bbTreeList),
- sizeof(bbDummy->bbTreeList));
fprintf(fout, "Offset / size of bbEntryState = %3u / %3u\n", offsetof(BasicBlock, bbEntryState),
sizeof(bbDummy->bbEntryState));
fprintf(fout, "Offset / size of bbStkTempsIn = %3u / %3u\n", offsetof(BasicBlock, bbStkTempsIn),
@@ -1618,12 +1647,8 @@ void Compiler::compDisplayStaticSizes(FILE* fout)
sizeof(bbDummy->bbHeapSsaNumIn));
fprintf(fout, "Offset / size of bbHeapSsaNumOut = %3u / %3u\n", offsetof(BasicBlock, bbHeapSsaNumOut),
sizeof(bbDummy->bbHeapSsaNumOut));
-
-#ifdef DEBUGGING_SUPPORT
fprintf(fout, "Offset / size of bbScope = %3u / %3u\n", offsetof(BasicBlock, bbScope),
sizeof(bbDummy->bbScope));
-#endif // DEBUGGING_SUPPORT
-
fprintf(fout, "Offset / size of bbCseGen = %3u / %3u\n", offsetof(BasicBlock, bbCseGen),
sizeof(bbDummy->bbCseGen));
fprintf(fout, "Offset / size of bbCseIn = %3u / %3u\n", offsetof(BasicBlock, bbCseIn),
@@ -1888,10 +1913,6 @@ void Compiler::compInit(ArenaAllocator* pAlloc, InlineInfo* inlineInfo)
SIMDVectorHandle = nullptr;
#endif
-#ifdef DEBUG
- inlRNG = nullptr;
-#endif
-
compUsesThrowHelper = false;
}
@@ -2244,14 +2265,14 @@ const char* Compiler::compLocalVarName(unsigned varNum, unsigned offs)
void Compiler::compSetProcessor()
{
- unsigned compileFlags = opts.eeFlags;
+ const JitFlags& jitFlags = *opts.jitFlags;
#if defined(_TARGET_ARM_)
info.genCPU = CPU_ARM;
#elif defined(_TARGET_AMD64_)
- info.genCPU = CPU_X64;
+ info.genCPU = CPU_X64;
#elif defined(_TARGET_X86_)
- if (compileFlags & CORJIT_FLG_TARGET_P4)
+ if (jitFlags.IsSet(JitFlags::JIT_FLAG_TARGET_P4))
info.genCPU = CPU_X86_PENTIUM_4;
else
info.genCPU = CPU_X86;
@@ -2262,33 +2283,66 @@ void Compiler::compSetProcessor()
//
CLANG_FORMAT_COMMENT_ANCHOR;
-#ifdef _TARGET_AMD64_
- opts.compUseFCOMI = false;
- opts.compUseCMOV = true;
- opts.compCanUseSSE2 = true;
+#ifdef _TARGET_XARCH_
+ opts.compCanUseSSE3_4 = false;
+ if (!jitFlags.IsSet(JitFlags::JIT_FLAG_PREJIT) && jitFlags.IsSet(JitFlags::JIT_FLAG_USE_SSE3_4))
+ {
+ if (JitConfig.EnableSSE3_4() != 0)
+ {
+ opts.compCanUseSSE3_4 = true;
+ }
+ }
#ifdef FEATURE_AVX_SUPPORT
// COMPlus_EnableAVX can be used to disable using AVX if available on a target machine.
// Note that FEATURE_AVX_SUPPORT is not enabled for ctpjit
opts.compCanUseAVX = false;
- if (((compileFlags & CORJIT_FLG_PREJIT) == 0) && ((compileFlags & CORJIT_FLG_USE_AVX2) != 0))
+ if (!jitFlags.IsSet(JitFlags::JIT_FLAG_PREJIT) && jitFlags.IsSet(JitFlags::JIT_FLAG_USE_AVX2))
{
if (JitConfig.EnableAVX() != 0)
{
opts.compCanUseAVX = true;
- if (!compIsForInlining())
- {
- codeGen->getEmitter()->SetUseAVX(true);
- }
}
}
-#endif
-#endif //_TARGET_AMD64_
+#endif // FEATURE_AVX_SUPPORT
-#ifdef _TARGET_X86_
- opts.compUseFCOMI = ((opts.eeFlags & CORJIT_FLG_USE_FCOMI) != 0);
- opts.compUseCMOV = ((opts.eeFlags & CORJIT_FLG_USE_CMOV) != 0);
- opts.compCanUseSSE2 = ((opts.eeFlags & CORJIT_FLG_USE_SSE2) != 0);
+ if (!compIsForInlining())
+ {
+#ifdef FEATURE_AVX_SUPPORT
+ if (opts.compCanUseAVX)
+ {
+ codeGen->getEmitter()->SetUseAVX(true);
+ }
+ else
+#endif // FEATURE_AVX_SUPPORT
+ if (opts.compCanUseSSE3_4)
+ {
+ codeGen->getEmitter()->SetUseSSE3_4(true);
+ }
+ }
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_AMD64_
+ opts.compUseFCOMI = false;
+ opts.compUseCMOV = true;
+ opts.compCanUseSSE2 = true;
+#elif defined(_TARGET_X86_)
+ opts.compUseFCOMI = jitFlags.IsSet(JitFlags::JIT_FLAG_USE_FCOMI);
+ opts.compUseCMOV = jitFlags.IsSet(JitFlags::JIT_FLAG_USE_CMOV);
+ opts.compCanUseSSE2 = jitFlags.IsSet(JitFlags::JIT_FLAG_USE_SSE2);
+
+#if !defined(LEGACY_BACKEND) && !defined(FEATURE_CORECLR)
+ // RyuJIT/x86 requires SSE2 to be available: there is no support for generating floating-point
+ // code with x87 instructions. On .NET Core, the VM always tells us that SSE2 is available.
+ // However, on desktop, under ngen, (and presumably in the unlikely case you're actually
+ // running on a machine without SSE2), the VM does not set the SSE2 flag. We ignore this and
+ // go ahead and generate SSE2 code anyway.
+ if (!opts.compCanUseSSE2)
+ {
+ JITDUMP("VM didn't set CORJIT_FLG_USE_SSE2! Ignoring, and generating SSE2 code anyway.\n");
+ opts.compCanUseSSE2 = true;
+ }
+#endif // !defined(LEGACY_BACKEND) && !defined(FEATURE_CORECLR)
#ifdef DEBUG
if (opts.compUseFCOMI)
@@ -2296,7 +2350,9 @@ void Compiler::compSetProcessor()
if (opts.compUseCMOV)
opts.compUseCMOV = !compStressCompile(STRESS_USE_CMOV, 50);
- // Should we override the SSE2 setting
+#ifdef LEGACY_BACKEND
+
+ // Should we override the SSE2 setting?
enum
{
SSE2_FORCE_DISABLE = 0,
@@ -2310,7 +2366,17 @@ void Compiler::compSetProcessor()
opts.compCanUseSSE2 = true;
else if (opts.compCanUseSSE2)
opts.compCanUseSSE2 = !compStressCompile(STRESS_GENERIC_VARN, 50);
+
+#else // !LEGACY_BACKEND
+
+ // RyuJIT/x86 requires SSE2 to be available and hence
+ // don't turn off compCanUseSSE2 under stress.
+ assert(opts.compCanUseSSE2);
+
+#endif // !LEGACY_BACKEND
+
#endif // DEBUG
+
#endif // _TARGET_X86_
}
@@ -2378,31 +2444,36 @@ unsigned ReinterpretHexAsDecimal(unsigned in)
return result;
}
-void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
+void Compiler::compInitOptions(JitFlags* jitFlags)
{
#ifdef UNIX_AMD64_ABI
opts.compNeedToAlignFrame = false;
#endif // UNIX_AMD64_ABI
memset(&opts, 0, sizeof(opts));
- unsigned compileFlags = jitFlags->corJitFlags;
-
if (compIsForInlining())
{
- assert((compileFlags & CORJIT_FLG_LOST_WHEN_INLINING) == 0);
- assert(compileFlags & CORJIT_FLG_SKIP_VERIFICATION);
+ // The following flags are lost when inlining. (They are removed in
+ // Compiler::fgInvokeInlineeCompiler().)
+ assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT));
+ assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR));
+ assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_ENTERLEAVE));
+ assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_EnC));
+ assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_INFO));
+
+ assert(jitFlags->IsSet(JitFlags::JIT_FLAG_SKIP_VERIFICATION));
}
opts.jitFlags = jitFlags;
- opts.eeFlags = compileFlags;
opts.compFlags = CLFLG_MAXOPT; // Default value is for full optimization
- if (opts.eeFlags & (CORJIT_FLG_DEBUG_CODE | CORJIT_FLG_MIN_OPT))
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_CODE) || jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT))
{
opts.compFlags = CLFLG_MINOPT;
}
// Don't optimize .cctors (except prejit) or if we're an inlinee
- else if (!(opts.eeFlags & CORJIT_FLG_PREJIT) && ((info.compFlags & FLG_CCTOR) == FLG_CCTOR) && !compIsForInlining())
+ else if (!jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && ((info.compFlags & FLG_CCTOR) == FLG_CCTOR) &&
+ !compIsForInlining())
{
opts.compFlags = CLFLG_MINOPT;
}
@@ -2414,32 +2485,31 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
// If the EE sets SIZE_OPT or if we are compiling a Class constructor
// we will optimize for code size at the expense of speed
//
- if ((opts.eeFlags & CORJIT_FLG_SIZE_OPT) || ((info.compFlags & FLG_CCTOR) == FLG_CCTOR))
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT) || ((info.compFlags & FLG_CCTOR) == FLG_CCTOR))
{
opts.compCodeOpt = SMALL_CODE;
}
//
// If the EE sets SPEED_OPT we will optimize for speed at the expense of code size
//
- else if (opts.eeFlags & CORJIT_FLG_SPEED_OPT)
+ else if (jitFlags->IsSet(JitFlags::JIT_FLAG_SPEED_OPT))
{
opts.compCodeOpt = FAST_CODE;
- assert((opts.eeFlags & CORJIT_FLG_SIZE_OPT) == 0);
+ assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT));
}
-//-------------------------------------------------------------------------
+ //-------------------------------------------------------------------------
+
+ opts.compDbgCode = jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_CODE);
+ opts.compDbgInfo = jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_INFO);
+ opts.compDbgEnC = jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_EnC);
-#ifdef DEBUGGING_SUPPORT
- opts.compDbgCode = (opts.eeFlags & CORJIT_FLG_DEBUG_CODE) != 0;
- opts.compDbgInfo = (opts.eeFlags & CORJIT_FLG_DEBUG_INFO) != 0;
- opts.compDbgEnC = (opts.eeFlags & CORJIT_FLG_DEBUG_EnC) != 0;
#if REGEN_SHORTCUTS || REGEN_CALLPAT
// We never want to have debugging enabled when regenerating GC encoding patterns
opts.compDbgCode = false;
opts.compDbgInfo = false;
opts.compDbgEnC = false;
#endif
-#endif
compSetProcessor();
@@ -2473,7 +2543,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
#ifdef DEBUG
const JitConfigValues::MethodSet* pfAltJit;
- if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
pfAltJit = &JitConfig.AltJitNgen();
}
@@ -2498,7 +2568,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
#else // !DEBUG
const char* altJitVal;
- if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
altJitVal = JitConfig.AltJitNgen().list();
}
@@ -2602,7 +2672,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
//
if (!compIsForInlining())
{
- if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
if (JitConfig.NgenDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
{
@@ -2952,10 +3022,8 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
#endif // DEBUG
#ifdef FEATURE_SIMD
-#ifdef _TARGET_AMD64_
- // Minimum bar for availing SIMD benefits is SSE2 on AMD64.
- featureSIMD = ((opts.eeFlags & CORJIT_FLG_FEATURE_SIMD) != 0);
-#endif // _TARGET_AMD64_
+ // Minimum bar for availing SIMD benefits is SSE2 on AMD64/x86.
+ featureSIMD = jitFlags->IsSet(JitFlags::JIT_FLAG_FEATURE_SIMD);
#endif // FEATURE_SIMD
if (compIsForInlining() || compIsForImportOnly())
@@ -2978,23 +3046,26 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
opts.compTailCallLoopOpt = true;
#endif
-#ifdef DEBUG
- opts.dspInstrs = false;
- opts.dspEmit = false;
- opts.dspLines = false;
- opts.varNames = false;
- opts.dmpHex = false;
- opts.disAsm = false;
- opts.disAsmSpilled = false;
- opts.disDiffable = false;
- opts.dspCode = false;
- opts.dspEHTable = false;
- opts.dspGCtbls = false;
- opts.disAsm2 = false;
- opts.dspUnwind = false;
- s_dspMemStats = false;
- opts.compLongAddress = false;
+#ifdef PROFILING_SUPPORTED
opts.compJitELTHookEnabled = false;
+#endif // PROFILING_SUPPORTED
+
+#ifdef DEBUG
+ opts.dspInstrs = false;
+ opts.dspEmit = false;
+ opts.dspLines = false;
+ opts.varNames = false;
+ opts.dmpHex = false;
+ opts.disAsm = false;
+ opts.disAsmSpilled = false;
+ opts.disDiffable = false;
+ opts.dspCode = false;
+ opts.dspEHTable = false;
+ opts.dspGCtbls = false;
+ opts.disAsm2 = false;
+ opts.dspUnwind = false;
+ opts.compLongAddress = false;
+ opts.optRepeat = false;
#ifdef LATE_DISASM
opts.doLateDisasm = false;
@@ -3007,7 +3078,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
//
if (!altJitConfig || opts.altJit)
{
- if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
if ((JitConfig.NgenOrder() & 1) == 1)
{
@@ -3084,14 +3155,14 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
opts.dspDiffable = true;
}
- if (JitConfig.DisplayMemStats() != 0)
+ if (JitConfig.JitLongAddress() != 0)
{
- s_dspMemStats = true;
+ opts.compLongAddress = true;
}
- if (JitConfig.JitLongAddress() != 0)
+ if (JitConfig.JitOptRepeat().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
{
- opts.compLongAddress = true;
+ opts.optRepeat = true;
}
}
@@ -3152,7 +3223,6 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
//-------------------------------------------------------------------------
-#ifdef DEBUGGING_SUPPORT
#ifdef DEBUG
assert(!codeGen->isGCTypeFixed());
opts.compGcChecks = (JitConfig.JitGCChecks() != 0) || compStressCompile(STRESS_GENERIC_VARN, 5);
@@ -3173,11 +3243,15 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
opts.compStackCheckOnCall = (dwJitStackChecks & DWORD(STACK_CHECK_ON_CALL)) != 0;
#endif
+#if MEASURE_MEM_ALLOC
+ s_dspMemStats = (JitConfig.DisplayMemStats() != 0);
+#endif
+
#ifdef PROFILING_SUPPORTED
- opts.compNoPInvokeInlineCB = (opts.eeFlags & CORJIT_FLG_PROF_NO_PINVOKE_INLINE) ? true : false;
+ opts.compNoPInvokeInlineCB = jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_NO_PINVOKE_INLINE);
// Cache the profiler handle
- if (opts.eeFlags & CORJIT_FLG_PROF_ENTERLEAVE)
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_ENTERLEAVE))
{
BOOL hookNeeded;
BOOL indirected;
@@ -3192,11 +3266,8 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
compProfilerMethHndIndirected = false;
}
-#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
- // Right now this ELT hook option is enabled only for arm and amd64
-
- // Honour complus_JitELTHookEnabled only if VM has not asked us to generate profiler
- // hooks in the first place. That is, Override VM only if it hasn't asked for a
+ // Honour COMPlus_JitELTHookEnabled only if VM has not asked us to generate profiler
+ // hooks in the first place. That is, override VM only if it hasn't asked for a
// profiler callback for this method.
if (!compProfilerHookNeeded && (JitConfig.JitELTHookEnabled() != 0))
{
@@ -3209,7 +3280,6 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
compProfilerMethHnd = (void*)DummyProfilerELTStub;
compProfilerMethHndIndirected = false;
}
-#endif // _TARGET_ARM_ || _TARGET_AMD64_
#endif // PROFILING_SUPPORTED
@@ -3226,10 +3296,9 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
}
#endif
- opts.compMustInlinePInvokeCalli = (opts.eeFlags & CORJIT_FLG_IL_STUB) ? true : false;
+ opts.compMustInlinePInvokeCalli = jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB);
opts.compScopeInfo = opts.compDbgInfo;
-#endif // DEBUGGING_SUPPORT
#ifdef LATE_DISASM
codeGen->getDisAssembler().disOpenForLateDisAsm(info.compMethodName, info.compClassName,
@@ -3239,7 +3308,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
//-------------------------------------------------------------------------
#if RELOC_SUPPORT
- opts.compReloc = (opts.eeFlags & CORJIT_FLG_RELOC) ? true : false;
+ opts.compReloc = jitFlags->IsSet(JitFlags::JIT_FLAG_RELOC);
#endif
#ifdef DEBUG
@@ -3249,7 +3318,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
#endif
#endif // DEBUG
- opts.compProcedureSplitting = (opts.eeFlags & CORJIT_FLG_PROCSPLIT) ? true : false;
+ opts.compProcedureSplitting = jitFlags->IsSet(JitFlags::JIT_FLAG_PROCSPLIT);
#ifdef _TARGET_ARM64_
// TODO-ARM64-NYI: enable hot/cold splitting
@@ -3294,7 +3363,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
fgProfileBuffer = nullptr;
fgProfileData_ILSizeMismatch = false;
fgNumProfileRuns = 0;
- if (opts.eeFlags & CORJIT_FLG_BBOPT)
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT))
{
assert(!compIsForInlining());
HRESULT hr;
@@ -3365,7 +3434,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
printf("OPTIONS: compProcedureSplitting = %s\n", dspBool(opts.compProcedureSplitting));
printf("OPTIONS: compProcedureSplittingEH = %s\n", dspBool(opts.compProcedureSplittingEH));
- if ((opts.eeFlags & CORJIT_FLG_BBOPT) && fgHaveProfileData())
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT) && fgHaveProfileData())
{
printf("OPTIONS: using real profile data\n");
}
@@ -3375,7 +3444,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
printf("OPTIONS: discarded IBC profile data due to mismatch in ILSize\n");
}
- if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
printf("OPTIONS: Jit invoked for ngen\n");
}
@@ -3384,11 +3453,11 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
#endif
opts.compGCPollType = GCPOLL_NONE;
- if (opts.eeFlags & CORJIT_FLG_GCPOLL_CALLS)
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_GCPOLL_CALLS))
{
opts.compGCPollType = GCPOLL_CALL;
}
- else if (opts.eeFlags & CORJIT_FLG_GCPOLL_INLINE)
+ else if (jitFlags->IsSet(JitFlags::JIT_FLAG_GCPOLL_INLINE))
{
// make sure that the EE didn't set both flags.
assert(opts.compGCPollType == GCPOLL_NONE);
@@ -3568,14 +3637,11 @@ void Compiler::compInitDebuggingInfo()
info.compVarScopesCount = 0;
-#ifdef DEBUGGING_SUPPORT
if (opts.compScopeInfo)
-#endif
{
eeGetVars();
}
-#ifdef DEBUGGING_SUPPORT
compInitVarScopeMap();
if (opts.compScopeInfo || opts.compDbgCode)
@@ -3598,7 +3664,6 @@ void Compiler::compInitDebuggingInfo()
JITDUMP("Debuggable code - Add new BB%02u to perform initialization of variables [%08X]\n", fgFirstBB->bbNum,
dspPtr(fgFirstBB));
}
-#endif // DEBUGGING_SUPPORT
/*-------------------------------------------------------------------------
*
@@ -3617,9 +3682,7 @@ void Compiler::compInitDebuggingInfo()
info.compStmtOffsetsCount = 0;
-#ifdef DEBUGGING_SUPPORT
if (opts.compDbgInfo)
-#endif
{
/* Get hold of the line# records, if there are any */
@@ -3661,12 +3724,9 @@ void Compiler::compInitDebuggingInfo()
void Compiler::compSetOptimizationLevel()
{
- unsigned compileFlags;
bool theMinOptsValue;
unsigned jitMinOpts;
- compileFlags = opts.eeFlags;
-
if (compIsForInlining())
{
theMinOptsValue = impInlineInfo->InlinerCompiler->opts.MinOpts();
@@ -3757,13 +3817,40 @@ void Compiler::compSetOptimizationLevel()
}
}
+#if 0
+ // The code in this #if can be used to debug optimization issues according to method hash.
+ // To use, uncomment, rebuild and set environment variables minoptshashlo and minoptshashhi.
+#ifdef DEBUG
+ unsigned methHash = info.compMethodHash();
+ char* lostr = getenv("minoptshashlo");
+ unsigned methHashLo = 0;
+ if (lostr != nullptr)
+ {
+ sscanf_s(lostr, "%x", &methHashLo);
+ char* histr = getenv("minoptshashhi");
+ unsigned methHashHi = UINT32_MAX;
+ if (histr != nullptr)
+ {
+ sscanf_s(histr, "%x", &methHashHi);
+ if (methHash >= methHashLo && methHash <= methHashHi)
+ {
+ printf("MinOpts for method %s, hash = 0x%x.\n",
+ info.compFullName, info.compMethodHash());
+ printf(""); // in our logic this causes a flush
+ theMinOptsValue = true;
+ }
+ }
+ }
+#endif
+#endif
+
if (compStressCompile(STRESS_MIN_OPTS, 5))
{
theMinOptsValue = true;
}
// For PREJIT we never drop down to MinOpts
// unless unless CLFLG_MINOPT is set
- else if (!(compileFlags & CORJIT_FLG_PREJIT))
+ else if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
if ((unsigned)JitConfig.JitMinOptsCodeSize() < info.compILCodeSize)
{
@@ -3805,7 +3892,7 @@ void Compiler::compSetOptimizationLevel()
// Retail check if we should force Minopts due to the complexity of the method
// For PREJIT we never drop down to MinOpts
// unless unless CLFLG_MINOPT is set
- if (!theMinOptsValue && !(compileFlags & CORJIT_FLG_PREJIT) &&
+ if (!theMinOptsValue && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) &&
((DEFAULT_MIN_OPTS_CODE_SIZE < info.compILCodeSize) || (DEFAULT_MIN_OPTS_INSTR_COUNT < opts.instrCount) ||
(DEFAULT_MIN_OPTS_BB_COUNT < fgBBcount) || (DEFAULT_MIN_OPTS_LV_NUM_COUNT < lvaCount) ||
(DEFAULT_MIN_OPTS_LV_REF_COUNT < opts.lvRefCount)))
@@ -3828,14 +3915,14 @@ void Compiler::compSetOptimizationLevel()
unsigned methHash = info.compMethodHash();
char* lostr = getenv("opthashlo");
unsigned methHashLo = 0;
- if (lostr != NULL)
+ if (lostr != NULL)
{
sscanf_s(lostr, "%x", &methHashLo);
// methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
}
char* histr = getenv("opthashhi");
unsigned methHashHi = UINT32_MAX;
- if (histr != NULL)
+ if (histr != NULL)
{
sscanf_s(histr, "%x", &methHashHi);
// methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
@@ -3883,27 +3970,27 @@ _SetMinOpts:
}
#if !defined(_TARGET_AMD64_)
- // The VM sets CORJIT_FLG_FRAMED for two reasons: (1) the COMPlus_JitFramed variable is set, or
+ // The VM sets JitFlags::JIT_FLAG_FRAMED for two reasons: (1) the COMPlus_JitFramed variable is set, or
// (2) the function is marked "noinline". The reason for #2 is that people mark functions
// noinline to ensure the show up on in a stack walk. But for AMD64, we don't need a frame
// pointer for the frame to show up in stack walk.
- if (compileFlags & CORJIT_FLG_FRAMED)
+ if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_FRAMED))
codeGen->setFrameRequired(true);
#endif
- if (compileFlags & CORJIT_FLG_RELOC)
+ if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELOC))
{
codeGen->genAlignLoops = false; // loop alignment not supported for prejitted code
- // The zapper doesn't set CORJIT_FLG_ALIGN_LOOPS, and there is
+ // The zapper doesn't set JitFlags::JIT_FLAG_ALIGN_LOOPS, and there is
// no reason for it to set it as the JIT doesn't currently support loop alignment
// for prejitted images. (The JIT doesn't know the final address of the code, hence
// it can't align code based on unknown addresses.)
- assert((compileFlags & CORJIT_FLG_ALIGN_LOOPS) == 0);
+ assert(!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALIGN_LOOPS));
}
else
{
- codeGen->genAlignLoops = (compileFlags & CORJIT_FLG_ALIGN_LOOPS) != 0;
+ codeGen->genAlignLoops = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALIGN_LOOPS);
}
}
@@ -4075,7 +4162,7 @@ void Compiler::compFunctionTraceEnd(void* methodCodePtr, ULONG methodCodeSize, b
// For an overview of the structure of the JIT, see:
// https://github.com/dotnet/coreclr/blob/master/Documentation/botr/ryujit-overview.md
//
-void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_FLAGS* compileFlags)
+void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags* compileFlags)
{
if (compIsForInlining())
{
@@ -4112,26 +4199,36 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F
fgRemovePreds();
}
+ EndPhase(PHASE_IMPORTATION);
+
if (compIsForInlining())
{
/* Quit inlining if fgImport() failed for any reason. */
- if (compDonotInline())
+ if (!compDonotInline())
{
- return;
+ /* Filter out unimported BBs */
+
+ fgRemoveEmptyBlocks();
}
- /* Filter out unimported BBs */
+ EndPhase(PHASE_POST_IMPORT);
- fgRemoveEmptyBlocks();
+#ifdef FEATURE_JIT_METHOD_PERF
+ if (pCompJitTimer != nullptr)
+ {
+#if MEASURE_CLRAPI_CALLS
+ EndPhase(PHASE_CLR_API);
+#endif
+ pCompJitTimer->Terminate(this, CompTimeSummaryInfo::s_compTimeSummary, false);
+ }
+#endif
return;
}
assert(!compDonotInline());
- EndPhase(PHASE_IMPORTATION);
-
// Maybe the caller was not interested in generating code
if (compIsForImportOnly())
{
@@ -4145,7 +4242,7 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F
fgRemoveEH();
#endif // !FEATURE_EH
- if (compileFlags->corJitFlags & CORJIT_FLG_BBINSTR)
+ if (compileFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR))
{
fgInstrumentMethod();
}
@@ -4180,7 +4277,7 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F
/* Massage the trees so that we can generate code out of them */
fgMorph();
- EndPhase(PHASE_MORPH);
+ EndPhase(PHASE_MORPH_END);
/* GS security checks for unsafe buffers */
if (getNeedsGSSecurityCookie())
@@ -4336,6 +4433,7 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F
bool doCopyProp = true;
bool doAssertionProp = true;
bool doRangeAnalysis = true;
+ int iterations = 1;
#ifdef DEBUG
doSsa = (JitConfig.JitDoSsa() != 0);
@@ -4345,72 +4443,88 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F
doCopyProp = doValueNum && (JitConfig.JitDoCopyProp() != 0);
doAssertionProp = doValueNum && (JitConfig.JitDoAssertionProp() != 0);
doRangeAnalysis = doAssertionProp && (JitConfig.JitDoRangeAnalysis() != 0);
-#endif
- if (doSsa)
+ if (opts.optRepeat)
{
- fgSsaBuild();
- EndPhase(PHASE_BUILD_SSA);
+ iterations = JitConfig.JitOptRepeatCount();
}
+#endif
- if (doEarlyProp)
+ while (iterations > 0)
{
- /* Propagate array length and rewrite getType() method call */
- optEarlyProp();
- EndPhase(PHASE_EARLY_PROP);
- }
+ if (doSsa)
+ {
+ fgSsaBuild();
+ EndPhase(PHASE_BUILD_SSA);
+ }
- if (doValueNum)
- {
- fgValueNumber();
- EndPhase(PHASE_VALUE_NUMBER);
- }
+ if (doEarlyProp)
+ {
+ /* Propagate array length and rewrite getType() method call */
+ optEarlyProp();
+ EndPhase(PHASE_EARLY_PROP);
+ }
- if (doLoopHoisting)
- {
- /* Hoist invariant code out of loops */
- optHoistLoopCode();
- EndPhase(PHASE_HOIST_LOOP_CODE);
- }
+ if (doValueNum)
+ {
+ fgValueNumber();
+ EndPhase(PHASE_VALUE_NUMBER);
+ }
- if (doCopyProp)
- {
- /* Perform VN based copy propagation */
- optVnCopyProp();
- EndPhase(PHASE_VN_COPY_PROP);
- }
+ if (doLoopHoisting)
+ {
+ /* Hoist invariant code out of loops */
+ optHoistLoopCode();
+ EndPhase(PHASE_HOIST_LOOP_CODE);
+ }
+
+ if (doCopyProp)
+ {
+ /* Perform VN based copy propagation */
+ optVnCopyProp();
+ EndPhase(PHASE_VN_COPY_PROP);
+ }
#if FEATURE_ANYCSE
- /* Remove common sub-expressions */
- optOptimizeCSEs();
+ /* Remove common sub-expressions */
+ optOptimizeCSEs();
#endif // FEATURE_ANYCSE
#if ASSERTION_PROP
- if (doAssertionProp)
- {
- /* Assertion propagation */
- optAssertionPropMain();
- EndPhase(PHASE_ASSERTION_PROP_MAIN);
- }
+ if (doAssertionProp)
+ {
+ /* Assertion propagation */
+ optAssertionPropMain();
+ EndPhase(PHASE_ASSERTION_PROP_MAIN);
+ }
- if (doRangeAnalysis)
- {
- /* Optimize array index range checks */
- RangeCheck rc(this);
- rc.OptimizeRangeChecks();
- EndPhase(PHASE_OPTIMIZE_INDEX_CHECKS);
- }
+ if (doRangeAnalysis)
+ {
+ /* Optimize array index range checks */
+ RangeCheck rc(this);
+ rc.OptimizeRangeChecks();
+ EndPhase(PHASE_OPTIMIZE_INDEX_CHECKS);
+ }
#endif // ASSERTION_PROP
- /* update the flowgraph if we modified it during the optimization phase*/
- if (fgModified)
- {
- fgUpdateFlowGraph();
- EndPhase(PHASE_UPDATE_FLOW_GRAPH);
+ /* update the flowgraph if we modified it during the optimization phase*/
+ if (fgModified)
+ {
+ fgUpdateFlowGraph();
+ EndPhase(PHASE_UPDATE_FLOW_GRAPH);
+
+ // Recompute the edge weight if we have modified the flow graph
+ fgComputeEdgeWeights();
+ EndPhase(PHASE_COMPUTE_EDGE_WEIGHTS2);
+ }
- // Recompute the edge weight if we have modified the flow graph
- fgComputeEdgeWeights();
- EndPhase(PHASE_COMPUTE_EDGE_WEIGHTS2);
+ // Iterate if requested, resetting annotations first.
+ if (--iterations == 0)
+ {
+ break;
+ }
+ ResetOptAnnotations();
+ RecomputeLoopInfo();
}
}
@@ -4540,7 +4654,12 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F
#ifdef FEATURE_JIT_METHOD_PERF
if (pCompJitTimer)
- pCompJitTimer->Terminate(this, CompTimeSummaryInfo::s_compTimeSummary);
+ {
+#if MEASURE_CLRAPI_CALLS
+ EndPhase(PHASE_CLR_API);
+#endif
+ pCompJitTimer->Terminate(this, CompTimeSummaryInfo::s_compTimeSummary, true);
+ }
#endif
RecordStateAtEndOfCompilation();
@@ -4569,6 +4688,82 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F
#endif // FUNC_INFO_LOGGING
}
+//------------------------------------------------------------------------
+// ResetOptAnnotations: Clear annotations produced during global optimizations.
+//
+// Notes:
+// The intent of this method is to clear any information typically assumed
+// to be set only once; it is used between iterations when JitOptRepeat is
+// in effect.
+
+void Compiler::ResetOptAnnotations()
+{
+ assert(opts.optRepeat);
+ assert(JitConfig.JitOptRepeatCount() > 0);
+ fgResetForSsa();
+ vnStore = nullptr;
+ m_opAsgnVarDefSsaNums = nullptr;
+ m_blockToEHPreds = nullptr;
+ fgSsaPassesCompleted = 0;
+ fgVNPassesCompleted = 0;
+
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
+ {
+ stmt->gtFlags &= ~GTF_STMT_HAS_CSE;
+
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree != nullptr; tree = tree->gtNext)
+ {
+ tree->ClearVN();
+ tree->ClearAssertion();
+ tree->gtCSEnum = NO_CSE;
+
+ // Clear any *_ASG_LHS flags -- these are set during SSA construction,
+ // and the heap live-in calculation depends on them being unset coming
+ // into SSA construction (without clearing them, a block that has a
+ // heap def via one of these before any heap use is treated as not having
+ // an upwards-exposed heap use, even though subsequent heap uses may not
+ // be killed by the store; this seems to be a bug, worked around here).
+ if (tree->OperIsIndir())
+ {
+ tree->gtFlags &= ~GTF_IND_ASG_LHS;
+ }
+ else if (tree->OperGet() == GT_CLS_VAR)
+ {
+ tree->gtFlags &= ~GTF_CLS_VAR_ASG_LHS;
+ }
+ }
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// RecomputeLoopInfo: Recompute loop annotations between opt-repeat iterations.
+//
+// Notes:
+// The intent of this method is to update loop structure annotations, and those
+// they depend on; these annotations may have become stale during optimization,
+// and need to be up-to-date before running another iteration of optimizations.
+
+void Compiler::RecomputeLoopInfo()
+{
+ assert(opts.optRepeat);
+ assert(JitConfig.JitOptRepeatCount() > 0);
+ // Recompute reachability sets, dominators, and loops.
+ optLoopCount = 0;
+ fgDomsComputed = false;
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ block->bbFlags &= ~BBF_LOOP_FLAGS;
+ }
+ fgComputeReachability();
+ // Rebuild the loop tree annotations themselves. Since this is performed as
+ // part of 'optOptimizeLoops', this will also re-perform loop rotation, but
+ // not other optimizations, as the others are not part of 'optOptimizeLoops'.
+ optOptimizeLoops();
+}
+
/*****************************************************************************/
void Compiler::ProcessShutdownWork(ICorStaticInfo* statInfo)
{
@@ -4696,11 +4891,13 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd,
CORINFO_METHOD_INFO* methodInfo,
void** methodCodePtr,
ULONG* methodCodeSize,
- CORJIT_FLAGS* compileFlags)
+ JitFlags* compileFlags)
{
#ifdef FEATURE_JIT_METHOD_PERF
static bool checkedForJitTimeLog = false;
+ pCompJitTimer = nullptr;
+
if (!checkedForJitTimeLog)
{
// Call into VM to get the config strings. FEATURE_JIT_METHOD_PERF is enabled for
@@ -4713,14 +4910,10 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd,
checkedForJitTimeLog = true;
}
- if ((Compiler::compJitTimeLogFilename != NULL) || (JitTimeLogCsv() != NULL))
+ if ((Compiler::compJitTimeLogFilename != nullptr) || (JitTimeLogCsv() != nullptr))
{
pCompJitTimer = JitTimer::Create(this, methodInfo->ILCodeSize);
}
- else
- {
- pCompJitTimer = NULL;
- }
#endif // FEATURE_JIT_METHOD_PERF
#ifdef DEBUG
@@ -4862,7 +5055,7 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd,
// Set this before the first 'BADCODE'
// Skip verification where possible
- tiVerificationNeeded = (compileFlags->corJitFlags & CORJIT_FLG_SKIP_VERIFICATION) == 0;
+ tiVerificationNeeded = !compileFlags->IsSet(JitFlags::JIT_FLAG_SKIP_VERIFICATION);
assert(!compIsForInlining() || !tiVerificationNeeded); // Inlinees must have been verified.
@@ -4893,8 +5086,8 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd,
case CORINFO_VERIFICATION_CAN_SKIP:
// The VM should first verify the open instantiation. If unverifiable code
- // is detected, it should pass in CORJIT_FLG_SKIP_VERIFICATION.
- assert(!"The VM should have used CORJIT_FLG_SKIP_VERIFICATION");
+ // is detected, it should pass in JitFlags::JIT_FLAG_SKIP_VERIFICATION.
+ assert(!"The VM should have used JitFlags::JIT_FLAG_SKIP_VERIFICATION");
tiVerificationNeeded = false;
break;
@@ -4933,7 +5126,7 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd,
CORINFO_METHOD_INFO* methodInfo;
void** methodCodePtr;
ULONG* methodCodeSize;
- CORJIT_FLAGS* compileFlags;
+ JitFlags* compileFlags;
CorInfoInstantiationVerification instVerInfo;
int result;
@@ -5000,6 +5193,8 @@ void Compiler::compCompileFinish()
// Make the updates.
genMemStats.nraTotalSizeAlloc = compGetAllocator()->getTotalBytesAllocated();
genMemStats.nraTotalSizeUsed = compGetAllocator()->getTotalBytesUsed();
+ memAllocHist.record((unsigned)((genMemStats.nraTotalSizeAlloc + 1023) / 1024));
+ memUsedHist.record((unsigned)((genMemStats.nraTotalSizeUsed + 1023) / 1024));
s_aggMemStats.Add(genMemStats);
if (genMemStats.allocSz > s_maxCompMemStats.allocSz)
{
@@ -5038,6 +5233,7 @@ void Compiler::compCompileFinish()
// the prolog which requires memory
(info.compLocalsCount <= 32) && (!opts.MinOpts()) && // We may have too many local variables, etc
(getJitStressLevel() == 0) && // We need extra memory for stress
+ !opts.optRepeat && // We need extra memory to repeat opts
!compAllocator->bypassHostAllocator() && // ArenaAllocator::getDefaultPageSize() is artificially low for
// DirectAlloc
(compAllocator->getTotalBytesAllocated() > (2 * ArenaAllocator::getDefaultPageSize())) &&
@@ -5071,7 +5267,7 @@ void Compiler::compCompileFinish()
mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd);
unsigned profCallCount = 0;
- if (((opts.eeFlags & CORJIT_FLG_BBOPT) != 0) && fgHaveProfileData())
+ if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT) && fgHaveProfileData())
{
assert(fgProfileBuffer[0].ILOffset == 0);
profCallCount = fgProfileBuffer[0].ExecutionCount;
@@ -5208,7 +5404,7 @@ void Compiler::compCompileFinish()
// For ngen the int3 or breakpoint instruction will be right at the
// start of the ngen method and we will stop when we execute it.
//
- if ((opts.eeFlags & CORJIT_FLG_PREJIT) == 0)
+ if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
if (compJitHaltMethod())
{
@@ -5296,7 +5492,7 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
CORINFO_METHOD_INFO* methodInfo,
void** methodCodePtr,
ULONG* methodCodeSize,
- CORJIT_FLAGS* compileFlags,
+ JitFlags* compileFlags,
CorInfoInstantiationVerification instVerInfo)
{
CORINFO_METHOD_HANDLE methodHnd = info.compMethodHnd;
@@ -5438,7 +5634,7 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
info.compIsContextful = (info.compClassAttr & CORINFO_FLG_CONTEXTFUL) != 0;
- info.compPublishStubParam = (opts.eeFlags & CORJIT_FLG_PUBLISH_SECRET_PARAM) != 0;
+ info.compPublishStubParam = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM);
switch (methodInfo->args.getCallConv())
{
@@ -5476,7 +5672,7 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
const bool forceInline = !!(info.compFlags & CORINFO_FLG_FORCEINLINE);
- if (!compIsForInlining() && (opts.eeFlags & CORJIT_FLG_PREJIT))
+ if (!compIsForInlining() && opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
// We're prejitting the root method. We also will analyze it as
// a potential inline candidate.
@@ -5644,10 +5840,6 @@ _Next:
return CORJIT_OK;
}
-/*****************************************************************************/
-#ifdef DEBUGGING_SUPPORT
-/*****************************************************************************/
-
//------------------------------------------------------------------------
// compFindLocalVarLinear: Linear search for variable's scope containing offset.
//
@@ -5992,11 +6184,7 @@ void Compiler::compProcessScopesUntil(unsigned offset,
} while (foundExit || foundEnter);
}
-/*****************************************************************************/
-#endif // DEBUGGING_SUPPORT
-/*****************************************************************************/
-
-#if defined(DEBUGGING_SUPPORT) && defined(DEBUG)
+#if defined(DEBUG)
void Compiler::compDispScopeLists()
{
@@ -6044,10 +6232,6 @@ void Compiler::compDispScopeLists()
}
}
-#endif
-
-#if defined(DEBUG)
-
void Compiler::compDispLocalVars()
{
printf("info.compVarScopesCount = %d\n", info.compVarScopesCount);
@@ -6066,7 +6250,66 @@ void Compiler::compDispLocalVars()
}
}
-#endif
+#endif // DEBUG
+
+/*****************************************************************************/
+
+#if MEASURE_CLRAPI_CALLS
+
+struct WrapICorJitInfo : public ICorJitInfo
+{
+ //------------------------------------------------------------------------
+ // WrapICorJitInfo::makeOne: allocate an instance of WrapICorJitInfo
+ //
+ // Arguments:
+ // alloc - the allocator to get memory from for the instance
+ // compile - the compiler instance
+ // compHndRef - the ICorJitInfo handle from the EE; the caller's
+ // copy may be replaced with a "wrapper" instance
+ //
+ // Return Value:
+ // If the config flags indicate that ICorJitInfo should be wrapped,
+ // we return the "wrapper" instance; otherwise we return "nullptr".
+
+ static WrapICorJitInfo* makeOne(ArenaAllocator* alloc, Compiler* compiler, COMP_HANDLE& compHndRef /* INOUT */)
+ {
+ WrapICorJitInfo* wrap = nullptr;
+
+ if (JitConfig.JitEECallTimingInfo() != 0)
+ {
+ // It's too early to use the default allocator, so we do this
+ // in two steps to be safe (the constructor doesn't need to do
+ // anything except fill in the vtable pointer, so we let the
+ // compiler do it).
+ void* inst = alloc->allocateMemory(roundUp(sizeof(WrapICorJitInfo)));
+ if (inst != nullptr)
+ {
+ // If you get a build error here due to 'WrapICorJitInfo' being
+ // an abstract class, it's very likely that the wrapper bodies
+ // in ICorJitInfo_API_wrapper.hpp are no longer in sync with
+ // the EE interface; please be kind and update the header file.
+ wrap = new (inst, jitstd::placement_t()) WrapICorJitInfo();
+
+ wrap->wrapComp = compiler;
+
+ // Save the real handle and replace it with our wrapped version.
+ wrap->wrapHnd = compHndRef;
+ compHndRef = wrap;
+ }
+ }
+
+ return wrap;
+ }
+
+private:
+ Compiler* wrapComp;
+ COMP_HANDLE wrapHnd; // the "real thing"
+
+public:
+#include "ICorJitInfo_API_wrapper.hpp"
+};
+
+#endif // MEASURE_CLRAPI_CALLS
/*****************************************************************************/
@@ -6078,7 +6321,7 @@ int jitNativeCode(CORINFO_METHOD_HANDLE methodHnd,
CORINFO_METHOD_INFO* methodInfo,
void** methodCodePtr,
ULONG* methodCodeSize,
- CORJIT_FLAGS* compileFlags,
+ JitFlags* compileFlags,
void* inlineInfoPtr)
{
//
@@ -6093,6 +6336,10 @@ START:
ArenaAllocator* pAlloc = nullptr;
ArenaAllocator alloc;
+#if MEASURE_CLRAPI_CALLS
+ WrapICorJitInfo* wrapCLR = nullptr;
+#endif
+
if (inlineInfo)
{
// Use inliner's memory allocator when compiling the inlinee.
@@ -6128,8 +6375,11 @@ START:
CORINFO_METHOD_INFO* methodInfo;
void** methodCodePtr;
ULONG* methodCodeSize;
- CORJIT_FLAGS* compileFlags;
+ JitFlags* compileFlags;
InlineInfo* inlineInfo;
+#if MEASURE_CLRAPI_CALLS
+ WrapICorJitInfo* wrapCLR;
+#endif
int result;
} param;
@@ -6145,7 +6395,10 @@ START:
param.methodCodeSize = methodCodeSize;
param.compileFlags = compileFlags;
param.inlineInfo = inlineInfo;
- param.result = result;
+#if MEASURE_CLRAPI_CALLS
+ param.wrapCLR = nullptr;
+#endif
+ param.result = result;
setErrorTrap(compHnd, Param*, pParamOuter, &param)
{
@@ -6172,6 +6425,10 @@ START:
pParam->pComp = (Compiler*)pParam->pAlloc->allocateMemory(roundUp(sizeof(*pParam->pComp)));
}
+#if MEASURE_CLRAPI_CALLS
+ pParam->wrapCLR = WrapICorJitInfo::makeOne(pParam->pAlloc, pParam->pComp, pParam->compHnd);
+#endif
+
// push this compiler on the stack (TLS)
pParam->pComp->prevCompiler = JitTls::GetCompiler();
JitTls::SetCompiler(pParam->pComp);
@@ -6238,8 +6495,9 @@ START:
jitFallbackCompile = true;
// Update the flags for 'safer' code generation.
- compileFlags->corJitFlags |= CORJIT_FLG_MIN_OPT;
- compileFlags->corJitFlags &= ~(CORJIT_FLG_SIZE_OPT | CORJIT_FLG_SPEED_OPT);
+ compileFlags->Set(JitFlags::JIT_FLAG_MIN_OPT);
+ compileFlags->Clear(JitFlags::JIT_FLAG_SIZE_OPT);
+ compileFlags->Clear(JitFlags::JIT_FLAG_SPEED_OPT);
goto START;
}
@@ -6952,9 +7210,12 @@ void Compiler::compDispCallArgStats(FILE* fout)
// Static variables
CritSecObject CompTimeSummaryInfo::s_compTimeSummaryLock;
CompTimeSummaryInfo CompTimeSummaryInfo::s_compTimeSummary;
+#if MEASURE_CLRAPI_CALLS
+double JitTimer::s_cyclesPerSec = CycleTimer::CyclesPerSecond();
+#endif
#endif // FEATURE_JIT_METHOD_PERF
-#if defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS
+#if defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS || defined(FEATURE_TRACELOGGING)
const char* PhaseNames[] = {
#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) string_nm,
#include "compphases.h"
@@ -6983,13 +7244,36 @@ int PhaseParent[] = {
};
CompTimeInfo::CompTimeInfo(unsigned byteCodeBytes)
- : m_byteCodeBytes(byteCodeBytes), m_totalCycles(0), m_parentPhaseEndSlop(0), m_timerFailure(false)
+ : m_byteCodeBytes(byteCodeBytes)
+ , m_totalCycles(0)
+ , m_parentPhaseEndSlop(0)
+ , m_timerFailure(false)
+#if MEASURE_CLRAPI_CALLS
+ , m_allClrAPIcalls(0)
+ , m_allClrAPIcycles(0)
+#endif
{
for (int i = 0; i < PHASE_NUMBER_OF; i++)
{
m_invokesByPhase[i] = 0;
m_cyclesByPhase[i] = 0;
+#if MEASURE_CLRAPI_CALLS
+ m_CLRinvokesByPhase[i] = 0;
+ m_CLRcyclesByPhase[i] = 0;
+#endif
}
+
+#if MEASURE_CLRAPI_CALLS
+ assert(ARRAYSIZE(m_perClrAPIcalls) == API_ICorJitInfo_Names::API_COUNT);
+ assert(ARRAYSIZE(m_perClrAPIcycles) == API_ICorJitInfo_Names::API_COUNT);
+ assert(ARRAYSIZE(m_maxClrAPIcycles) == API_ICorJitInfo_Names::API_COUNT);
+ for (int i = 0; i < API_ICorJitInfo_Names::API_COUNT; i++)
+ {
+ m_perClrAPIcalls[i] = 0;
+ m_perClrAPIcycles[i] = 0;
+ m_maxClrAPIcycles[i] = 0;
+ }
+#endif
}
bool CompTimeSummaryInfo::IncludedInFilteredData(CompTimeInfo& info)
@@ -6997,52 +7281,125 @@ bool CompTimeSummaryInfo::IncludedInFilteredData(CompTimeInfo& info)
return false; // info.m_byteCodeBytes < 10;
}
-void CompTimeSummaryInfo::AddInfo(CompTimeInfo& info)
+//------------------------------------------------------------------------
+// CompTimeSummaryInfo::AddInfo: Record timing info from one compile.
+//
+// Arguments:
+// info - The timing information to record.
+// includePhases - If "true", the per-phase info in "info" is valid,
+// which means that a "normal" compile has ended; if
+// the value is "false" we are recording the results
+// of a partial compile (typically an import-only run
+// on behalf of the inliner) in which case the phase
+// info is not valid and so we only record EE call
+// overhead.
+void CompTimeSummaryInfo::AddInfo(CompTimeInfo& info, bool includePhases)
{
if (info.m_timerFailure)
+ {
return; // Don't update if there was a failure.
+ }
CritSecHolder timeLock(s_compTimeSummaryLock);
- m_numMethods++;
- bool includeInFiltered = IncludedInFilteredData(info);
+ if (includePhases)
+ {
+ bool includeInFiltered = IncludedInFilteredData(info);
- // Update the totals and maxima.
- m_total.m_byteCodeBytes += info.m_byteCodeBytes;
- m_maximum.m_byteCodeBytes = max(m_maximum.m_byteCodeBytes, info.m_byteCodeBytes);
- m_total.m_totalCycles += info.m_totalCycles;
- m_maximum.m_totalCycles = max(m_maximum.m_totalCycles, info.m_totalCycles);
+ m_numMethods++;
- if (includeInFiltered)
- {
- m_numFilteredMethods++;
- m_filtered.m_byteCodeBytes += info.m_byteCodeBytes;
- m_filtered.m_totalCycles += info.m_totalCycles;
- m_filtered.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop;
- }
+ // Update the totals and maxima.
+ m_total.m_byteCodeBytes += info.m_byteCodeBytes;
+ m_maximum.m_byteCodeBytes = max(m_maximum.m_byteCodeBytes, info.m_byteCodeBytes);
+ m_total.m_totalCycles += info.m_totalCycles;
+ m_maximum.m_totalCycles = max(m_maximum.m_totalCycles, info.m_totalCycles);
+
+#if MEASURE_CLRAPI_CALLS
+ // Update the CLR-API values.
+ m_total.m_allClrAPIcalls += info.m_allClrAPIcalls;
+ m_maximum.m_allClrAPIcalls = max(m_maximum.m_allClrAPIcalls, info.m_allClrAPIcalls);
+ m_total.m_allClrAPIcycles += info.m_allClrAPIcycles;
+ m_maximum.m_allClrAPIcycles = max(m_maximum.m_allClrAPIcycles, info.m_allClrAPIcycles);
+#endif
- for (int i = 0; i < PHASE_NUMBER_OF; i++)
- {
- m_total.m_invokesByPhase[i] += info.m_invokesByPhase[i];
- m_total.m_cyclesByPhase[i] += info.m_cyclesByPhase[i];
if (includeInFiltered)
{
- m_filtered.m_invokesByPhase[i] += info.m_invokesByPhase[i];
- m_filtered.m_cyclesByPhase[i] += info.m_cyclesByPhase[i];
+ m_numFilteredMethods++;
+ m_filtered.m_byteCodeBytes += info.m_byteCodeBytes;
+ m_filtered.m_totalCycles += info.m_totalCycles;
+ m_filtered.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop;
+ }
+
+ for (int i = 0; i < PHASE_NUMBER_OF; i++)
+ {
+ m_total.m_invokesByPhase[i] += info.m_invokesByPhase[i];
+ m_total.m_cyclesByPhase[i] += info.m_cyclesByPhase[i];
+
+#if MEASURE_CLRAPI_CALLS
+ m_total.m_CLRinvokesByPhase[i] += info.m_CLRinvokesByPhase[i];
+ m_total.m_CLRcyclesByPhase[i] += info.m_CLRcyclesByPhase[i];
+#endif
+
+ if (includeInFiltered)
+ {
+ m_filtered.m_invokesByPhase[i] += info.m_invokesByPhase[i];
+ m_filtered.m_cyclesByPhase[i] += info.m_cyclesByPhase[i];
+#if MEASURE_CLRAPI_CALLS
+ m_filtered.m_CLRinvokesByPhase[i] += info.m_CLRinvokesByPhase[i];
+ m_filtered.m_CLRcyclesByPhase[i] += info.m_CLRcyclesByPhase[i];
+#endif
+ }
+ m_maximum.m_cyclesByPhase[i] = max(m_maximum.m_cyclesByPhase[i], info.m_cyclesByPhase[i]);
+
+#if MEASURE_CLRAPI_CALLS
+ m_maximum.m_CLRcyclesByPhase[i] = max(m_maximum.m_CLRcyclesByPhase[i], info.m_CLRcyclesByPhase[i]);
+#endif
}
- m_maximum.m_cyclesByPhase[i] = max(m_maximum.m_cyclesByPhase[i], info.m_cyclesByPhase[i]);
+ m_total.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop;
+ m_maximum.m_parentPhaseEndSlop = max(m_maximum.m_parentPhaseEndSlop, info.m_parentPhaseEndSlop);
+ }
+#if MEASURE_CLRAPI_CALLS
+ else
+ {
+ m_totMethods++;
+
+ // Update the "global" CLR-API values.
+ m_total.m_allClrAPIcalls += info.m_allClrAPIcalls;
+ m_maximum.m_allClrAPIcalls = max(m_maximum.m_allClrAPIcalls, info.m_allClrAPIcalls);
+ m_total.m_allClrAPIcycles += info.m_allClrAPIcycles;
+ m_maximum.m_allClrAPIcycles = max(m_maximum.m_allClrAPIcycles, info.m_allClrAPIcycles);
+
+ // Update the per-phase CLR-API values.
+ m_total.m_invokesByPhase[PHASE_CLR_API] += info.m_allClrAPIcalls;
+ m_maximum.m_invokesByPhase[PHASE_CLR_API] =
+ max(m_maximum.m_perClrAPIcalls[PHASE_CLR_API], info.m_allClrAPIcalls);
+ m_total.m_cyclesByPhase[PHASE_CLR_API] += info.m_allClrAPIcycles;
+ m_maximum.m_cyclesByPhase[PHASE_CLR_API] =
+ max(m_maximum.m_cyclesByPhase[PHASE_CLR_API], info.m_allClrAPIcycles);
+ }
+
+ for (int i = 0; i < API_ICorJitInfo_Names::API_COUNT; i++)
+ {
+ m_total.m_perClrAPIcalls[i] += info.m_perClrAPIcalls[i];
+ m_maximum.m_perClrAPIcalls[i] = max(m_maximum.m_perClrAPIcalls[i], info.m_perClrAPIcalls[i]);
+
+ m_total.m_perClrAPIcycles[i] += info.m_perClrAPIcycles[i];
+ m_maximum.m_perClrAPIcycles[i] = max(m_maximum.m_perClrAPIcycles[i], info.m_perClrAPIcycles[i]);
+
+ m_maximum.m_maxClrAPIcycles[i] = max(m_maximum.m_maxClrAPIcycles[i], info.m_maxClrAPIcycles[i]);
}
- m_total.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop;
- m_maximum.m_parentPhaseEndSlop = max(m_maximum.m_parentPhaseEndSlop, info.m_parentPhaseEndSlop);
+#endif
}
// Static
-LPCWSTR Compiler::compJitTimeLogFilename = NULL;
+LPCWSTR Compiler::compJitTimeLogFilename = nullptr;
void CompTimeSummaryInfo::Print(FILE* f)
{
- if (f == NULL)
+ if (f == nullptr)
+ {
return;
+ }
// Otherwise...
double countsPerSec = CycleTimer::CyclesPerSecond();
if (countsPerSec == 0.0)
@@ -7051,13 +7408,16 @@ void CompTimeSummaryInfo::Print(FILE* f)
return;
}
+ bool extraInfo = (JitConfig.JitEECallTimingInfo() != 0);
+ double totTime_ms = 0.0;
+
fprintf(f, "JIT Compilation time report:\n");
fprintf(f, " Compiled %d methods.\n", m_numMethods);
if (m_numMethods != 0)
{
fprintf(f, " Compiled %d bytecodes total (%d max, %8.2f avg).\n", m_total.m_byteCodeBytes,
m_maximum.m_byteCodeBytes, (double)m_total.m_byteCodeBytes / (double)m_numMethods);
- double totTime_ms = ((double)m_total.m_totalCycles / countsPerSec) * 1000.0;
+ totTime_ms = ((double)m_total.m_totalCycles / countsPerSec) * 1000.0;
fprintf(f, " Time: total: %10.3f Mcycles/%10.3f ms\n", ((double)m_total.m_totalCycles / 1000000.0),
totTime_ms);
fprintf(f, " max: %10.3f Mcycles/%10.3f ms\n", ((double)m_maximum.m_totalCycles) / 1000000.0,
@@ -7065,15 +7425,36 @@ void CompTimeSummaryInfo::Print(FILE* f)
fprintf(f, " avg: %10.3f Mcycles/%10.3f ms\n",
((double)m_total.m_totalCycles) / 1000000.0 / (double)m_numMethods, totTime_ms / (double)m_numMethods);
- fprintf(f, " Total time by phases:\n");
- fprintf(f, " PHASE inv/meth Mcycles time (ms) %% of total max (ms)\n");
- fprintf(f, " --------------------------------------------------------------------------------------\n");
+ const char* extraHdr1 = "";
+ const char* extraHdr2 = "";
+#if MEASURE_CLRAPI_CALLS
+ if (extraInfo)
+ {
+ extraHdr1 = " CLRs/meth % in CLR";
+ extraHdr2 = "-----------------------";
+ }
+#endif
+
+ fprintf(f, "\n Total time by phases:\n");
+ fprintf(f, " PHASE inv/meth Mcycles time (ms) %% of total max (ms)%s\n",
+ extraHdr1);
+ fprintf(f, " ---------------------------------------------------------------------------------------%s\n",
+ extraHdr2);
+
// Ensure that at least the names array and the Phases enum have the same number of entries:
assert(sizeof(PhaseNames) / sizeof(const char*) == PHASE_NUMBER_OF);
for (int i = 0; i < PHASE_NUMBER_OF; i++)
{
- double phase_tot_ms = (((double)m_total.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
- double phase_max_ms = (((double)m_maximum.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
+ double phase_tot_ms = (((double)m_total.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
+ double phase_max_ms = (((double)m_maximum.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
+ double phase_tot_pct = 100.0 * phase_tot_ms / totTime_ms;
+
+#if MEASURE_CLRAPI_CALLS
+ // Skip showing CLR API call info if we didn't collect any
+ if (i == PHASE_CLR_API && !extraInfo)
+ continue;
+#endif
+
// Indent nested phases, according to depth.
int ancPhase = PhaseParent[i];
while (ancPhase != -1)
@@ -7081,13 +7462,33 @@ void CompTimeSummaryInfo::Print(FILE* f)
fprintf(f, " ");
ancPhase = PhaseParent[ancPhase];
}
- fprintf(f, " %-30s %5.2f %10.2f %9.3f %8.2f%% %8.3f\n", PhaseNames[i],
+ fprintf(f, " %-30s %6.2f %10.2f %9.3f %8.2f%% %8.3f", PhaseNames[i],
((double)m_total.m_invokesByPhase[i]) / ((double)m_numMethods),
((double)m_total.m_cyclesByPhase[i]) / 1000000.0, phase_tot_ms, (phase_tot_ms * 100.0 / totTime_ms),
phase_max_ms);
+
+#if MEASURE_CLRAPI_CALLS
+ if (extraInfo && i != PHASE_CLR_API)
+ {
+ double nest_tot_ms = (((double)m_total.m_CLRcyclesByPhase[i]) / countsPerSec) * 1000.0;
+ double nest_percent = nest_tot_ms * 100.0 / totTime_ms;
+ double calls_per_fn = ((double)m_total.m_CLRinvokesByPhase[i]) / ((double)m_numMethods);
+
+ if (nest_percent > 0.1 || calls_per_fn > 10)
+ fprintf(f, " %5.1f %8.2f%%", calls_per_fn, nest_percent);
+ }
+#endif
+ fprintf(f, "\n");
+ }
+
+ // Show slop if it's over a certain percentage of the total
+ double pslop_pct = 100.0 * m_total.m_parentPhaseEndSlop * 1000.0 / countsPerSec / totTime_ms;
+ if (pslop_pct >= 1.0)
+ {
+ fprintf(f, "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles = "
+ "%3.1f%% of total.\n\n",
+ m_total.m_parentPhaseEndSlop / 1000000.0, pslop_pct);
}
- fprintf(f, "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles.\n",
- m_total.m_parentPhaseEndSlop);
}
if (m_numFilteredMethods > 0)
{
@@ -7121,19 +7522,125 @@ void CompTimeSummaryInfo::Print(FILE* f)
((double)m_filtered.m_cyclesByPhase[i]) / 1000000.0, phase_tot_ms,
(phase_tot_ms * 100.0 / totTime_ms));
}
- fprintf(f, "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles.\n",
- m_filtered.m_parentPhaseEndSlop);
+
+ double fslop_ms = m_filtered.m_parentPhaseEndSlop * 1000.0 / countsPerSec;
+ if (fslop_ms > 1.0)
+ {
+ fprintf(f,
+ "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles.\n",
+ m_filtered.m_parentPhaseEndSlop);
+ }
}
+
+#if MEASURE_CLRAPI_CALLS
+ if (m_total.m_allClrAPIcalls > 0 && m_total.m_allClrAPIcycles > 0)
+ {
+ fprintf(f, "\n");
+ if (m_totMethods > 0)
+ fprintf(f, " Imported %u methods.\n\n", m_numMethods + m_totMethods);
+
+ fprintf(f, " CLR API # calls total time max time avg time %% "
+ "of total\n");
+ fprintf(f, " -------------------------------------------------------------------------------");
+ fprintf(f, "---------------------\n");
+
+ static const char* APInames[] = {
+#define DEF_CLR_API(name) #name,
+#include "ICorJitInfo_API_names.h"
+ };
+
+ unsigned shownCalls = 0;
+ double shownMillis = 0.0;
+#ifdef DEBUG
+ unsigned checkedCalls = 0;
+ double checkedMillis = 0.0;
+#endif
+
+ for (unsigned pass = 0; pass < 2; pass++)
+ {
+ for (unsigned i = 0; i < API_ICorJitInfo_Names::API_COUNT; i++)
+ {
+ unsigned calls = m_total.m_perClrAPIcalls[i];
+ if (calls == 0)
+ continue;
+
+ unsigned __int64 cycles = m_total.m_perClrAPIcycles[i];
+ double millis = 1000.0 * cycles / countsPerSec;
+
+ // Don't show the small fry to keep the results manageable
+ if (millis < 0.5)
+ {
+ // We always show the following API because it is always called
+ // exactly once for each method and its body is the simplest one
+ // possible (it just returns an integer constant), and therefore
+ // it can be used to measure the overhead of adding the CLR API
+ // timing code. Roughly speaking, on a 3GHz x64 box the overhead
+ // per call should be around 40 ns when using RDTSC, compared to
+ // about 140 ns when using GetThreadCycles() under Windows.
+ if (i != API_ICorJitInfo_Names::API_getExpectedTargetArchitecture)
+ continue;
+ }
+
+ // In the first pass we just compute the totals.
+ if (pass == 0)
+ {
+ shownCalls += m_total.m_perClrAPIcalls[i];
+ shownMillis += millis;
+ continue;
+ }
+
+ unsigned __int32 maxcyc = m_maximum.m_maxClrAPIcycles[i];
+ double max_ms = 1000.0 * maxcyc / countsPerSec;
+
+ fprintf(f, " %-40s", APInames[i]); // API name
+ fprintf(f, " %8u %9.1f ms", calls, millis); // #calls, total time
+ fprintf(f, " %8.1f ms %8.1f ns", max_ms, 1000000.0 * millis / calls); // max, avg time
+ fprintf(f, " %5.1f%%\n", 100.0 * millis / shownMillis); // % of total
+
+#ifdef DEBUG
+ checkedCalls += m_total.m_perClrAPIcalls[i];
+ checkedMillis += millis;
+#endif
+ }
+ }
+
+#ifdef DEBUG
+ assert(checkedCalls == shownCalls);
+ assert(checkedMillis == shownMillis);
+#endif
+
+ if (shownCalls > 0 || shownMillis > 0)
+ {
+ fprintf(f, " -------------------------");
+ fprintf(f, "---------------------------------------------------------------------------\n");
+ fprintf(f, " Total for calls shown above %8u %10.1f ms", shownCalls, shownMillis);
+ if (totTime_ms > 0.0)
+ fprintf(f, " (%4.1lf%% of overall JIT time)", shownMillis * 100.0 / totTime_ms);
+ fprintf(f, "\n");
+ }
+ fprintf(f, "\n");
+ }
+#endif
+
+ fprintf(f, "\n");
}
JitTimer::JitTimer(unsigned byteCodeSize) : m_info(byteCodeSize)
{
+#if MEASURE_CLRAPI_CALLS
+ m_CLRcallInvokes = 0;
+ m_CLRcallCycles = 0;
+#endif
+
#ifdef DEBUG
m_lastPhase = (Phases)-1;
+#if MEASURE_CLRAPI_CALLS
+ m_CLRcallAPInum = -1;
+#endif
#endif
unsigned __int64 threadCurCycles;
- if (GetThreadCycles(&threadCurCycles))
+ if (_our_GetThreadCycles(&threadCurCycles))
{
m_start = threadCurCycles;
m_curPhaseStart = threadCurCycles;
@@ -7147,9 +7654,10 @@ void JitTimer::EndPhase(Phases phase)
// assert((int)phase > (int)m_lastPhase); // We should end phases in increasing order.
unsigned __int64 threadCurCycles;
- if (GetThreadCycles(&threadCurCycles))
+ if (_our_GetThreadCycles(&threadCurCycles))
{
unsigned __int64 phaseCycles = (threadCurCycles - m_curPhaseStart);
+
// If this is not a leaf phase, the assumption is that the last subphase must have just recently ended.
// Credit the duration to "slop", the total of which should be very small.
if (PhaseHasChildren[phase])
@@ -7161,6 +7669,13 @@ void JitTimer::EndPhase(Phases phase)
// It is a leaf phase. Credit duration to it.
m_info.m_invokesByPhase[phase]++;
m_info.m_cyclesByPhase[phase] += phaseCycles;
+
+#if MEASURE_CLRAPI_CALLS
+ // Record the CLR API timing info as well.
+ m_info.m_CLRinvokesByPhase[phase] += m_CLRcallInvokes;
+ m_info.m_CLRcyclesByPhase[phase] += m_CLRcallCycles;
+#endif
+
// Credit the phase's ancestors, if any.
int ancPhase = PhaseParent[phase];
while (ancPhase != -1)
@@ -7168,8 +7683,13 @@ void JitTimer::EndPhase(Phases phase)
m_info.m_cyclesByPhase[ancPhase] += phaseCycles;
ancPhase = PhaseParent[ancPhase];
}
- // Did we just end the last phase?
- if (phase + 1 == PHASE_NUMBER_OF)
+
+#if MEASURE_CLRAPI_CALLS
+ const Phases lastPhase = PHASE_CLR_API;
+#else
+ const Phases lastPhase = PHASE_NUMBER_OF;
+#endif
+ if (phase + 1 == lastPhase)
{
m_info.m_totalCycles = (threadCurCycles - m_start);
}
@@ -7179,11 +7699,92 @@ void JitTimer::EndPhase(Phases phase)
}
}
}
+
#ifdef DEBUG
m_lastPhase = phase;
#endif
+#if MEASURE_CLRAPI_CALLS
+ m_CLRcallInvokes = 0;
+ m_CLRcallCycles = 0;
+#endif
+}
+
+#if MEASURE_CLRAPI_CALLS
+
+//------------------------------------------------------------------------
+// JitTimer::CLRApiCallEnter: Start the stopwatch for an EE call.
+//
+// Arguments:
+// apix - The API index - an "enum API_ICorJitInfo_Names" value.
+//
+
+void JitTimer::CLRApiCallEnter(unsigned apix)
+{
+ assert(m_CLRcallAPInum == -1); // Nested calls not allowed
+ m_CLRcallAPInum = apix;
+
+ // If we can't get the cycles, we'll just ignore this call
+ if (!_our_GetThreadCycles(&m_CLRcallStart))
+ m_CLRcallStart = 0;
+}
+
+//------------------------------------------------------------------------
+// JitTimer::CLRApiCallLeave: compute / record time spent in an EE call.
+//
+// Arguments:
+// apix - The API's "enum API_ICorJitInfo_Names" value; this value
+// should match the value passed to the most recent call to
+// "CLRApiCallEnter" (i.e. these must come as matched pairs),
+// and they also may not nest.
+//
+
+void JitTimer::CLRApiCallLeave(unsigned apix)
+{
+ // Make sure we're actually inside a measured CLR call.
+ assert(m_CLRcallAPInum != -1);
+ m_CLRcallAPInum = -1;
+
+ // Ignore this one if we don't have a valid starting counter.
+ if (m_CLRcallStart != 0)
+ {
+ if (JitConfig.JitEECallTimingInfo() != 0)
+ {
+ unsigned __int64 threadCurCycles;
+ if (_our_GetThreadCycles(&threadCurCycles))
+ {
+ // Compute the cycles spent in the call.
+ threadCurCycles -= m_CLRcallStart;
+
+ // Add the cycles to the 'phase' and bump its use count.
+ m_info.m_cyclesByPhase[PHASE_CLR_API] += threadCurCycles;
+ m_info.m_invokesByPhase[PHASE_CLR_API] += 1;
+
+ // Add the values to the "per API" info.
+ m_info.m_allClrAPIcycles += threadCurCycles;
+ m_info.m_allClrAPIcalls += 1;
+
+ m_info.m_perClrAPIcalls[apix] += 1;
+ m_info.m_perClrAPIcycles[apix] += threadCurCycles;
+ m_info.m_maxClrAPIcycles[apix] = max(m_info.m_maxClrAPIcycles[apix], (unsigned __int32)threadCurCycles);
+
+ // Subtract the cycles from the enclosing phase by bumping its start time
+ m_curPhaseStart += threadCurCycles;
+
+ // Update the running totals.
+ m_CLRcallInvokes += 1;
+ m_CLRcallCycles += threadCurCycles;
+ }
+ }
+
+ m_CLRcallStart = 0;
+ }
+
+ assert(m_CLRcallAPInum != -1); // No longer in this API call.
+ m_CLRcallAPInum = -1;
}
+#endif // MEASURE_CLRAPI_CALLS
+
CritSecObject JitTimer::s_csvLock;
LPCWSTR Compiler::JitTimeLogCsv()
@@ -7195,39 +7796,38 @@ LPCWSTR Compiler::JitTimeLogCsv()
void JitTimer::PrintCsvHeader()
{
LPCWSTR jitTimeLogCsv = Compiler::JitTimeLogCsv();
- if (jitTimeLogCsv == NULL)
+ if (jitTimeLogCsv == nullptr)
{
return;
}
CritSecHolder csvLock(s_csvLock);
- FILE* fp = _wfopen(jitTimeLogCsv, W("r"));
- if (fp == nullptr)
+ FILE* fp = _wfopen(jitTimeLogCsv, W("a"));
+ if (fp != nullptr)
{
- // File doesn't exist, so create it and write the header
-
- // Use write mode, so we rewrite the file, and retain only the last compiled process/dll.
- // Ex: ngen install mscorlib won't print stats for "ngen" but for "mscorsvw"
- FILE* fp = _wfopen(jitTimeLogCsv, W("w"));
- fprintf(fp, "\"Method Name\",");
- fprintf(fp, "\"Method Index\",");
- fprintf(fp, "\"IL Bytes\",");
- fprintf(fp, "\"Basic Blocks\",");
- fprintf(fp, "\"Opt Level\",");
- fprintf(fp, "\"Loops Cloned\",");
-
- for (int i = 0; i < PHASE_NUMBER_OF; i++)
+ // Write the header if the file is empty
+ if (ftell(fp) == 0)
{
- fprintf(fp, "\"%s\",", PhaseNames[i]);
- }
+ fprintf(fp, "\"Method Name\",");
+ fprintf(fp, "\"Method Index\",");
+ fprintf(fp, "\"IL Bytes\",");
+ fprintf(fp, "\"Basic Blocks\",");
+ fprintf(fp, "\"Opt Level\",");
+ fprintf(fp, "\"Loops Cloned\",");
- InlineStrategy::DumpCsvHeader(fp);
+ for (int i = 0; i < PHASE_NUMBER_OF; i++)
+ {
+ fprintf(fp, "\"%s\",", PhaseNames[i]);
+ }
- fprintf(fp, "\"Total Cycles\",");
- fprintf(fp, "\"CPS\"\n");
+ InlineStrategy::DumpCsvHeader(fp);
+
+ fprintf(fp, "\"Total Cycles\",");
+ fprintf(fp, "\"CPS\"\n");
+ }
+ fclose(fp);
}
- fclose(fp);
}
extern ICorJitHost* g_jitHost;
@@ -7235,7 +7835,7 @@ extern ICorJitHost* g_jitHost;
void JitTimer::PrintCsvMethodStats(Compiler* comp)
{
LPCWSTR jitTimeLogCsv = Compiler::JitTimeLogCsv();
- if (jitTimeLogCsv == NULL)
+ if (jitTimeLogCsv == nullptr)
{
return;
}
@@ -7265,7 +7865,9 @@ void JitTimer::PrintCsvMethodStats(Compiler* comp)
for (int i = 0; i < PHASE_NUMBER_OF; i++)
{
if (!PhaseHasChildren[i])
+ {
totCycles += m_info.m_cyclesByPhase[i];
+ }
fprintf(fp, "%I64u,", m_info.m_cyclesByPhase[i]);
}
@@ -7277,23 +7879,14 @@ void JitTimer::PrintCsvMethodStats(Compiler* comp)
}
// Completes the timing of the current method, and adds it to "sum".
-void JitTimer::Terminate(Compiler* comp, CompTimeSummaryInfo& sum)
+void JitTimer::Terminate(Compiler* comp, CompTimeSummaryInfo& sum, bool includePhases)
{
-#ifdef DEBUG
- unsigned __int64 totCycles2 = 0;
- for (int i = 0; i < PHASE_NUMBER_OF; i++)
+ if (includePhases)
{
- if (!PhaseHasChildren[i])
- totCycles2 += m_info.m_cyclesByPhase[i];
+ PrintCsvMethodStats(comp);
}
- // We include m_parentPhaseEndSlop in the next phase's time also (we probably shouldn't)
- // totCycles2 += m_info.m_parentPhaseEndSlop;
- assert(totCycles2 == m_info.m_totalCycles);
-#endif
-
- PrintCsvMethodStats(comp);
- sum.AddInfo(m_info);
+ sum.AddInfo(m_info, includePhases);
}
#endif // FEATURE_JIT_METHOD_PERF
@@ -7331,6 +7924,10 @@ void Compiler::MemStats::PrintByKind(FILE* f)
void Compiler::AggregateMemStats::Print(FILE* f)
{
fprintf(f, "For %9u methods:\n", nMethods);
+ if (nMethods == 0)
+ {
+ return;
+ }
fprintf(f, " count: %12u (avg %7u per method)\n", allocCnt, allocCnt / nMethods);
fprintf(f, " alloc size : %12llu (avg %7llu per method)\n", allocSz, allocSz / nMethods);
fprintf(f, " max alloc : %12llu\n", allocSzMax);
@@ -8520,6 +9117,9 @@ int cTreeFlagsIR(Compiler* comp, GenTree* tree)
break;
case GT_MUL:
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ case GT_MUL_LONG:
+#endif
if (tree->gtFlags & GTF_MUL_64RSLT)
{
@@ -10124,11 +10724,6 @@ void cNodeIR(Compiler* comp, GenTree* tree)
}
break;
- case GT_STORE_CLS_VAR:
-
- chars += printf(" ???");
- break;
-
case GT_LEA:
GenTreeAddrMode* lea = tree->AsAddrMode();