diff options
Diffstat (limited to 'src/jit')
31 files changed, 702 insertions, 452 deletions
diff --git a/src/jit/CMakeLists.txt b/src/jit/CMakeLists.txt index b18b8cf3be..b492cf7a9c 100644 --- a/src/jit/CMakeLists.txt +++ b/src/jit/CMakeLists.txt @@ -32,6 +32,7 @@ set( JIT_SOURCES gentree.cpp gschecks.cpp hashbv.cpp + hostallocator.cpp importer.cpp inline.cpp inlinepolicy.cpp diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index b9e6d50eae..a1a04c2d3b 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -6585,11 +6585,24 @@ void CodeGen::genCodeForLdObj(GenTreeOp* treeNode) { if (hasGCpointers) { - // We have GC pointers use two ldr instructions + // We have GC pointers use two ldr instructions + // + // We do it this way because we can't currently pass or track + // two different emitAttr values for a ldp instruction. - getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), targetReg, addrReg, structOffset); - noway_assert(REG_NEXT(targetReg) != addrReg); - getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), REG_NEXT(targetReg), addrReg, structOffset + TARGET_POINTER_SIZE); + // Make sure that the first load instruction does not overwrite the addrReg. + // + if (targetReg != addrReg) + { + getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), targetReg, addrReg, structOffset); + getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), REG_NEXT(targetReg), addrReg, structOffset + TARGET_POINTER_SIZE); + } + else + { + assert(REG_NEXT(targetReg) != addrReg); + getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), REG_NEXT(targetReg), addrReg, structOffset + TARGET_POINTER_SIZE); + getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), targetReg, addrReg, structOffset); + } } else { @@ -6597,7 +6610,7 @@ void CodeGen::genCodeForLdObj(GenTreeOp* treeNode) getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, targetReg, REG_NEXT(targetReg), addrReg, structOffset); } - remainingSize = 0; // We have completely wrote the 16-byte struct + remainingSize = 0; // We completely wrote the 16-byte struct } while (remainingSize > 0) @@ -6642,7 +6655,10 @@ void CodeGen::genCodeForLdObj(GenTreeOp* treeNode) instruction loadIns = ins_Load(loadType); emitAttr loadAttr = emitAttr(loadSize); - noway_assert(targetReg != addrReg); + // When deferLoad is false, targetReg can be the same as addrReg + // because the last instruction is allowed to overwrite addrReg. + // + noway_assert(!deferLoad || (targetReg != addrReg)); getEmitter()->emitIns_R_R_I(loadIns, loadAttr, targetReg, addrReg, structOffset); } diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index caa0a21b57..06a7e3e2c6 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -4069,28 +4069,21 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, slots = 1; #if FEATURE_MULTIREG_ARGS -#ifdef _TARGET_ARM64_ - if (varDsc->TypeGet() == TYP_STRUCT) + if (varDsc->lvIsMultiregStruct()) { - if (varDsc->lvExactSize > REGSIZE_BYTES) - { - assert(varDsc->lvExactSize <= 2*REGSIZE_BYTES); - - // Note that regArgNum+1 represents an argument index not an actual argument register. - // see genMapRegArgNumToRegNum(unsigned argNum, var_types type) + // Note that regArgNum+1 represents an argument index not an actual argument register. + // see genMapRegArgNumToRegNum(unsigned argNum, var_types type) - // This is the setup for the second half of a MULTIREG struct arg - noway_assert(regArgNum+1 < regState->rsCalleeRegArgNum); - // we better not have added it already (there better not be multiple vars representing this argument register) - noway_assert(regArgTab[regArgNum+1].slot == 0); + // This is the setup for the second half of a MULTIREG struct arg + noway_assert(regArgNum+1 < regState->rsCalleeRegArgNum); + // we better not have added it already (there better not be multiple vars representing this argument register) + noway_assert(regArgTab[regArgNum+1].slot == 0); - regArgTab[regArgNum+1].varNum = varNum; - regArgTab[regArgNum+1].slot = 2; + regArgTab[regArgNum+1].varNum = varNum; + regArgTab[regArgNum+1].slot = 2; - slots++; - } + slots = 2; } -#endif // _TARGET_ARM64_ #endif // FEATURE_MULTIREG_ARGS } diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 94c031423b..6a9e38e438 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -176,8 +176,67 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) // Make sure that EAX is reported as live GC-ref so that any GC that kicks in while // executing GS cookie check will not collect the object pointed to by EAX. - if (!pushReg && (compiler->info.compRetType == TYP_REF)) - gcInfo.gcRegGCrefSetCur |= RBM_INTRET; + // + // For Amd64 System V, a two-register-returned struct could be returned in RAX and RDX + // In such case make sure that the correct GC-ness of RDX is reported as well, so + // a GC object pointed by RDX will not be collected. + if (!pushReg) + { +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // Handling struct returned in two registers (only applicable to System V systems)... + if (compiler->compMethodReturnsMultiRegRetType()) + { + // Get the return tye of the struct. + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR retStructDesc; + compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(compiler->info.compMethodInfo->args.retTypeClass, &retStructDesc); + + assert(retStructDesc.passedInRegisters); + + // In case the return type is a two-register-return, the native return type should be a struct + assert(varTypeIsStruct(compiler->info.compRetNativeType)); + + assert(retStructDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + + unsigned __int8 offset0 = 0; + unsigned __int8 offset1 = 0; + + var_types type0 = TYP_UNKNOWN; + var_types type1 = TYP_UNKNOWN; + + // Set the GC-ness of the struct return registers. + getStructTypeOffset(retStructDesc, &type0, &type1, &offset0, &offset1); + gcInfo.gcMarkRegPtrVal(REG_INTRET, type0); + gcInfo.gcMarkRegPtrVal(REG_INTRET_1, type1); + } + else +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + if (compiler->compMethodReturnsRetBufAddr()) + { + // This is for returning in an implicit RetBuf. + // If the address of the buffer is returned in REG_INTRET, mark the content of INTRET as ByRef. + + // In case the return is in an implicit RetBuf, the native return type should be a struct + assert(varTypeIsStruct(compiler->info.compRetNativeType)); + + gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF); + } + // ... all other cases. + else + { +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // For System V structs that are not returned in registers are always + // returned in implicit RetBuf. If we reached here, we should not have + // a RetBuf and the return type should not be a struct. + assert(compiler->info.compRetBuffArg == BAD_VAR_NUM); + assert(!varTypeIsStruct(compiler->info.compRetNativeType)); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + + // For Windows we can't make such assertions since we generate code for returning of + // the RetBuf in REG_INTRET only when the ProfilerHook is enabled. Otherwise + // compRetNativeType could be TYP_STRUCT. + gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType); + } + } regNumber regGSCheck; if (!pushReg) @@ -185,14 +244,16 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) // Non-tail call: we can use any callee trash register that is not // a return register or contain 'this' pointer (keep alive this), since // we are generating GS cookie check after a GT_RETURN block. + // Note: On Amd64 System V RDX is an arg register - REG_ARG_2 - as well + // as return register for two-register-returned structs. if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister && - (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ECX)) + (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ARG_0)) { - regGSCheck = REG_RDX; + regGSCheck = REG_ARG_1; } else { - regGSCheck = REG_RCX; + regGSCheck = REG_ARG_0; } } else @@ -2615,12 +2676,8 @@ CodeGen::genStoreRegisterReturnInLclVar(GenTreePtr treeNode) assert(structDesc.passedInRegisters); - // TODO-Amd64-Unix: Have Lubo Review this change - // Test case JIT.opt.ETW.TailCallCases.TailCallCases has eightByteCount == 1 - // This occurs with a TYP_STRUCT that is 3 bytes in size - // commenting out this assert results in correct codegen - // - // assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + // The type of LclVars of TYP_STRUCT with one eightbyte is normalized. + assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); GenTreePtr op1 = treeNode->gtOp.gtOp1; genConsumeRegs(op1); diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp index 264286679a..8146f3aa2b 100644 --- a/src/jit/compiler.cpp +++ b/src/jit/compiler.cpp @@ -14,6 +14,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #ifdef _MSC_VER #pragma hdrstop #endif // _MSC_VER +#include "hostallocator.h" #include "emit.h" #include "ssabuilder.h" #include "valuenum.h" @@ -246,10 +247,10 @@ NodeSizeStats genNodeSizeStats; NodeSizeStats genNodeSizeStatsPerFunc; unsigned genTreeNcntHistBuckets[] = { 10, 20, 30, 40, 50, 100, 200, 300, 400, 500, 1000, 5000, 10000, 0 }; -histo genTreeNcntHist(DefaultAllocator::Singleton(), genTreeNcntHistBuckets); +Histogram genTreeNcntHist(HostAllocator::getHostAllocator(), genTreeNcntHistBuckets); unsigned genTreeNsizHistBuckets[] = { 1000, 5000, 10000, 50000, 100000, 500000, 1000000, 0 }; -histo genTreeNsizHist(DefaultAllocator::Singleton(), genTreeNsizHistBuckets); +Histogram genTreeNsizHist(HostAllocator::getHostAllocator(), genTreeNsizHistBuckets); #endif // MEASURE_NODE_SIZE /***************************************************************************** @@ -300,16 +301,16 @@ unsigned argTotalGTF_ASGinArgs; unsigned argMaxTempsPerMethod; unsigned argCntBuckets[] = { 0, 1, 2, 3, 4, 5, 6, 10, 0 }; -histo argCntTable(DefaultAllocator::Singleton(), argCntBuckets); +Histogram argCntTable(HostAllocator::getHostAllocator(), argCntBuckets); unsigned argDWordCntBuckets[] = { 0, 1, 2, 3, 4, 5, 6, 10, 0 }; -histo argDWordCntTable(DefaultAllocator::Singleton(), argDWordCntBuckets); +Histogram argDWordCntTable(HostAllocator::getHostAllocator(), argDWordCntBuckets); unsigned argDWordLngCntBuckets[] = { 0, 1, 2, 3, 4, 5, 6, 10, 0 }; -histo argDWordLngCntTable(DefaultAllocator::Singleton(), argDWordLngCntBuckets); +Histogram argDWordLngCntTable(HostAllocator::getHostAllocator(), argDWordLngCntBuckets); unsigned argTempsCntBuckets[] = { 0, 1, 2, 3, 4, 5, 6, 10, 0 }; -histo argTempsCntTable(DefaultAllocator::Singleton(), argTempsCntBuckets); +Histogram argTempsCntTable(HostAllocator::getHostAllocator(), argTempsCntBuckets); #endif // CALL_ARG_STATS @@ -336,12 +337,12 @@ histo argTempsCntTable(DefaultAllocator::Singleton(), argTempsCntBuckets); // -------------------------------------------------- unsigned bbCntBuckets[] = { 1, 2, 3, 5, 10, 20, 50, 100, 1000, 10000, 0 }; -histo bbCntTable(DefaultAllocator::Singleton(), bbCntBuckets); +Histogram bbCntTable(HostAllocator::getHostAllocator(), bbCntBuckets); /* Histogram for the IL opcode size of methods with a single basic block */ unsigned bbSizeBuckets[] = { 1, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 0 }; -histo bbOneBBSizeTable(DefaultAllocator::Singleton(), bbSizeBuckets); +Histogram bbOneBBSizeTable(HostAllocator::getHostAllocator(), bbSizeBuckets); #endif // COUNT_BASIC_BLOCKS @@ -373,12 +374,12 @@ bool loopOverflowThisMethod; // True if we exceeded the max # of loop /* Histogram for number of loops in a method */ unsigned loopCountBuckets[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0 }; -histo loopCountTable(DefaultAllocator::Singleton(), loopCountBuckets); +Histogram loopCountTable(HostAllocator::getHostAllocator(), loopCountBuckets); /* Histogram for number of loop exits */ unsigned loopExitCountBuckets[] = { 0, 1, 2, 3, 4, 5, 6, 0 }; -histo loopExitCountTable(DefaultAllocator::Singleton(), loopExitCountBuckets); +Histogram loopExitCountTable(HostAllocator::getHostAllocator(), loopExitCountBuckets); #endif // COUNT_LOOPS @@ -463,7 +464,7 @@ void Compiler::getStructGcPtrsFromOp(GenTreePtr op, BYTE *gcPtrsOut) assert(varNum < lvaCount); LclVarDsc* varDsc = &lvaTable[varNum]; - // At this point any TYP_STRUCT LclVar must be a 16-byte pass by valeu argument + // At this point any TYP_STRUCT LclVar must be a 16-byte pass by value argument assert(varDsc->lvSize() == 2 * TARGET_POINTER_SIZE); gcPtrsOut[0] = varDsc->lvGcLayout[0]; @@ -890,7 +891,7 @@ void Compiler::compShutdown() fprintf(fout, "--------------------------------------------------\n"); fprintf(fout, "Basic block count frequency table:\n"); fprintf(fout, "--------------------------------------------------\n"); - bbCntTable.histoDsp(fout); + bbCntTable.dump(fout); fprintf(fout, "--------------------------------------------------\n"); fprintf(fout, "\n"); @@ -898,7 +899,7 @@ void Compiler::compShutdown() fprintf(fout, "--------------------------------------------------\n"); fprintf(fout, "IL method size frequency table for methods with a single basic block:\n"); fprintf(fout, "--------------------------------------------------\n"); - bbOneBBSizeTable.histoDsp(fout); + bbOneBBSizeTable.dump(fout); fprintf(fout, "--------------------------------------------------\n"); #endif // COUNT_BASIC_BLOCKS @@ -922,11 +923,11 @@ void Compiler::compShutdown() fprintf(fout, "--------------------------------------------------\n"); fprintf(fout, "Loop count frequency table:\n"); fprintf(fout, "--------------------------------------------------\n"); - loopCountTable.histoDsp(fout); + loopCountTable.dump(fout); fprintf(fout, "--------------------------------------------------\n"); fprintf(fout, "Loop exit count frequency table:\n"); fprintf(fout, "--------------------------------------------------\n"); - loopExitCountTable.histoDsp(fout); + loopExitCountTable.dump(fout); fprintf(fout, "--------------------------------------------------\n"); #endif // COUNT_LOOPS @@ -960,12 +961,12 @@ void Compiler::compShutdown() fprintf(fout, "\n"); fprintf(fout, "---------------------------------------------------\n"); fprintf(fout, "Distribution of per-method GenTree node counts:\n"); - genTreeNcntHist.histoDsp(fout); + genTreeNcntHist.dump(fout); fprintf(fout, "\n"); fprintf(fout, "---------------------------------------------------\n"); fprintf(fout, "Distribution of per-method GenTree node allocations (in bytes):\n"); - genTreeNsizHist.histoDsp(fout); + genTreeNsizHist.dump(fout); #endif // MEASURE_NODE_SIZE @@ -1408,7 +1409,6 @@ void Compiler::compInit(ArenaAllocator * pAlloc, InlineInfo * inl compMaxUncheckedOffsetForNullObject = MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT; compNativeSizeEstimate = NATIVE_SIZE_INVALID; - compInlineeHints = (InlineHints)0; for (unsigned i = 0; i < MAX_LOOP_NUM; i++) { @@ -2113,7 +2113,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) { // NOTE: The Assembly name list is allocated in the process heap, not in the no-release heap, which is reclaimed // for every compilation. This is ok because we only allocate once, due to the static. - s_pAltJitExcludeAssembliesList = new (ProcessHeapAllocator::Singleton()) AssemblyNamesList2(wszAltJitExcludeAssemblyList, ProcessHeapAllocator::Singleton()); + s_pAltJitExcludeAssembliesList = new (HostAllocator::getHostAllocator()) AssemblyNamesList2(wszAltJitExcludeAssemblyList, HostAllocator::getHostAllocator()); } } @@ -4518,8 +4518,8 @@ void Compiler::compCompileFinish() #endif // LOOP_HOIST_STATS #if MEASURE_NODE_SIZE - genTreeNcntHist.histoRec(genNodeSizeStatsPerFunc.genTreeNodeCnt, 1); - genTreeNsizHist.histoRec(genNodeSizeStatsPerFunc.genTreeNodeSize, 1); + genTreeNcntHist.record(static_cast<unsigned>(genNodeSizeStatsPerFunc.genTreeNodeCnt)); + genTreeNsizHist.record(static_cast<unsigned>(genNodeSizeStatsPerFunc.genTreeNodeSize)); #endif #if defined(DEBUG) @@ -4924,89 +4924,112 @@ int Compiler::compCompileHelper (CORINFO_MODULE_HANDLE clas lvaInitTypeRef(); - bool hasBeenMarkedAsBadInlinee = false; - bool forceInline = !!(info.compFlags & CORINFO_FLG_FORCEINLINE); - if (!compIsForInlining()) { compInitDebuggingInfo(); - - if (opts.eeFlags & CORJIT_FLG_PREJIT) - { - // Cache inlining hint during NGen to avoid touching bodies of non-inlineable methods at runtime - InlineResult trialResult(this, methodHnd, "prejit1"); - impCanInlineIL(methodHnd, methodInfo, forceInline, &trialResult); - if (trialResult.isFailure()) - { - // It is a bad inlinee according to impCanInlineIL. - // This decision better not be context-dependent. - assert(trialResult.isNever()); - - // Don't bother with the second stage of the evaluation for this method. - hasBeenMarkedAsBadInlinee = true; - } - else - { - // Since we're not actually inlining anything, don't report success. - trialResult.setReported(); - } - } } - /* Find and create the basic blocks */ + const bool forceInline = !!(info.compFlags & CORINFO_FLG_FORCEINLINE); - fgFindBasicBlocks(); - if (compDonotInline()) + if (!compIsForInlining() && (opts.eeFlags & CORJIT_FLG_PREJIT)) { - goto _Next; - } + // We're prejitting the root method. We also will analyze it as + // a potential inline candidate. + InlineResult prejitResult(this, methodHnd, "prejit"); - // - // Now, we might have calculated the compNativeSizeEstimate in fgFindJumpTargets. - // If we haven't marked this method as a bad inlinee as a result of impCanInlineIL, - // check to see if it is a bad inlinee according to impCanInlineNative. - // - if (!compIsForInlining() && // We are compiling a method (not inlining one). - !hasBeenMarkedAsBadInlinee && // The method hasn't been marked as bad inlinee. - (opts.eeFlags & CORJIT_FLG_PREJIT) && // This is NGEN. - !forceInline && // The size of the method matters. - (methodInfo->ILCodeSize > ALWAYS_INLINE_SIZE)) - { - assert(methodInfo->ILCodeSize <= impInlineSize); // Otherwise it must have been marked as a bad inlinee by impCanInlineIL. - - // We must have run the CodeSeq state machine and got the native size estimate. - assert(compNativeSizeEstimate != NATIVE_SIZE_INVALID); - - int callsiteNativeSizeEstimate = impEstimateCallsiteNativeSize(methodInfo); - InlineResult trialResult(this, methodHnd, "prejit2"); - - impCanInlineNative(callsiteNativeSizeEstimate, - compNativeSizeEstimate, - compInlineeHints, - nullptr, // Calculate static inlining hint. - &trialResult); + // Do the initial inline screen. + impCanInlineIL(methodHnd, methodInfo, forceInline, &prejitResult); + + // Temporarily install the prejitResult as the + // compInlineResult so it's available to fgFindJumpTargets + // and can accumulate more observations as the IL is + // scanned. + // + // We don't pass prejitResult in as a parameter to avoid + // potential aliasing confusion -- the other call to + // fgFindBasicBlocks may have set up compInlineResult and + // the code in fgFindJumpTargets references that data + // member extensively. + assert(compInlineResult == nullptr); + compInlineResult = &prejitResult; + + // Find the basic blocks. We must do this regardless of + // inlineability, since we are prejitting this method. + // + // Among other things, this will set compNativeSizeEstimate + // for the subset of methods we check below. + fgFindBasicBlocks(); + + // Undo the temporary setup. + assert(compInlineResult == &prejitResult); + compInlineResult = nullptr; - if (trialResult.isFailure()) + // If this method is still a viable inline candidate, + // do the profitability screening. + if (prejitResult.isCandidate()) { - // Bingo! It is a bad inlinee according to impCanInlineNative. + // Only needed if the inline is discretionary (not forced) + // and the size is over the always threshold. + if (!forceInline && (methodInfo->ILCodeSize > ALWAYS_INLINE_SIZE)) + { + // We should have run the CodeSeq state machine + // and got the native size estimate. + assert(compNativeSizeEstimate != NATIVE_SIZE_INVALID); + + // Estimate the call site impact + int callsiteNativeSizeEstimate = impEstimateCallsiteNativeSize(methodInfo); + + // See if we're willing to pay for inlining this method + impCanInlineNative(callsiteNativeSizeEstimate, + compNativeSizeEstimate, + nullptr, // Calculate static inlining hint. + &prejitResult); + } + } + else + { + // If it's not a candidate, it should be a failure. + assert(prejitResult.isFailure()); + } + + // Handle the results of the inline analysis. + if (prejitResult.isFailure()) + { + // This method is a bad inlinee according to our + // analysis. We will let the InlineResult destructor + // mark it as noinline in the prejit image to save the + // jit some work. + // // This decision better not be context-dependent. - assert(trialResult.isNever()); + assert(prejitResult.isNever()); } - else + else { - // Since we're not actually inlining, don't report success. - trialResult.setReported(); + // This looks like a viable inline candidate. Since + // we're not actually inlining, don't report anything. + prejitResult.setReported(); } } + else + { + // We are jitting the root method, or inlining. + fgFindBasicBlocks(); + } + + // If we're inlining and the candidate is bad, bail out. + if (compDonotInline()) + { + goto _Next; + } compSetOptimizationLevel(); #if COUNT_BASIC_BLOCKS - bbCntTable.histoRec(fgBBcount, 1); + bbCntTable.record(fgBBcount); if (fgBBcount == 1) { - bbOneBBSizeTable.histoRec(methodInfo->ILCodeSize, 1); + bbOneBBSizeTable.record(methodInfo->ILCodeSize); } #endif // COUNT_BASIC_BLOCKS @@ -6312,15 +6335,15 @@ void Compiler::compCallArgStats() argTempsThisMethod+= regArgTemp; - argCntTable.histoRec(argNum, 1); - argDWordCntTable.histoRec(argDWordNum, 1); - argDWordLngCntTable.histoRec(argDWordNum + 2*argLngNum, 1); + argCntTable.record(argNum); + argDWordCntTable.record(argDWordNum); + argDWordLngCntTable.record(argDWordNum + (2 * argLngNum)); #endif // LEGACY_BACKEND } } } - argTempsCntTable.histoRec(argTempsThisMethod, 1); + argTempsCntTable.record(argTempsThisMethod); if (argMaxTempsPerMethod < argTempsThisMethod) { @@ -6346,7 +6369,7 @@ void Compiler::compDispCallArgStats(FILE* fout) fprintf(fout, "Percentage of virtual calls = %4.2f %%\n", (float)(100 * argVirtualCalls ) / argTotalCalls); fprintf(fout, "Percentage of non-virtual calls = %4.2f %%\n\n", (float)(100 * argNonVirtualCalls) / argTotalCalls); - fprintf(fout, "Average # of arguments per call = %.2f%\n\n", (float) argTotalArgs / argTotalCalls); + fprintf(fout, "Average # of arguments per call = %.2f%%\n\n", (float) argTotalArgs / argTotalCalls); fprintf(fout, "Percentage of DWORD arguments = %.2f %%\n", (float)(100 * argTotalDWordArgs ) / argTotalArgs); fprintf(fout, "Percentage of LONG arguments = %.2f %%\n", (float)(100 * argTotalLongArgs ) / argTotalArgs); @@ -6380,26 +6403,26 @@ void Compiler::compDispCallArgStats(FILE* fout) fprintf(fout, "--------------------------------------------------\n"); fprintf(fout, "Argument count frequency table (includes ObjPtr):\n"); fprintf(fout, "--------------------------------------------------\n"); - argCntTable.histoDsp(fout); + argCntTable.dump(fout); fprintf(fout, "--------------------------------------------------\n"); fprintf(fout, "--------------------------------------------------\n"); fprintf(fout, "DWORD argument count frequency table (w/o LONG):\n"); fprintf(fout, "--------------------------------------------------\n"); - argDWordCntTable.histoDsp(fout); + argDWordCntTable.dump(fout); fprintf(fout, "--------------------------------------------------\n"); fprintf(fout, "--------------------------------------------------\n"); fprintf(fout, "Temps count frequency table (per method):\n"); fprintf(fout, "--------------------------------------------------\n"); - argTempsCntTable.histoDsp(fout); + argTempsCntTable.dump(fout); fprintf(fout, "--------------------------------------------------\n"); /* fprintf(fout, "--------------------------------------------------\n"); fprintf(fout, "DWORD argument count frequency table (w/ LONG):\n"); fprintf(fout, "--------------------------------------------------\n"); - argDWordLngCntTable.histoDsp(fout); + argDWordLngCntTable.dump(fout); fprintf(fout, "--------------------------------------------------\n"); */ } diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 39f1e37365..3f585f974a 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -598,6 +598,20 @@ public: return (unsigned)(roundUp(lvExactSize, sizeof(void*))); } + bool lvIsMultiregStruct() + { +#if FEATURE_MULTIREG_ARGS_OR_RET +#ifdef _TARGET_ARM64_ + if ((TypeGet() == TYP_STRUCT) && + (lvSize() == 2 * TARGET_POINTER_SIZE)) + { + return true; + } +#endif // _TARGET_ARM64_ +#endif // FEATURE_MULTIREG_ARGS_OR_RET + return false; + } + #if defined(DEBUGGING_SUPPORT) || defined(DEBUG) unsigned lvSlotNum; // original slot # (if remapped) #endif @@ -3098,7 +3112,6 @@ private: void impCanInlineNative(int callsiteNativeEstimate, int calleeNativeSizeEstimate, - InlineHints inlineHints, InlineInfo* pInlineInfo, InlineResult* inlineResult); @@ -8555,7 +8568,6 @@ public: #define NATIVE_SIZE_INVALID (-10000) int compNativeSizeEstimate; // The estimated native size of this method. - InlineHints compInlineeHints; // Inlining hints from the inline candidate. #ifdef DEBUG CodeSeqSM fgCodeSeqSm; // The code sequence state machine used in the inliner. @@ -8853,8 +8865,8 @@ extern size_t gcPtrMapNSize; */ #if COUNT_BASIC_BLOCKS -extern histo bbCntTable; -extern histo bbOneBBSizeTable; +extern Histogram bbCntTable; +extern Histogram bbOneBBSizeTable; #endif @@ -8881,8 +8893,8 @@ extern unsigned constIterLoopCount; // counts the # of loops with a extern bool hasMethodLoops; // flag to keep track if we already counted a method as having loops extern unsigned loopsThisMethod; // counts the number of loops in the current method extern bool loopOverflowThisMethod; // True if we exceeded the max # of loops in the method. -extern histo loopCountTable; // Histogram of loop counts -extern histo loopExitCountTable; // Histogram of loop exit counts +extern Histogram loopCountTable; // Histogram of loop counts +extern Histogram loopExitCountTable; // Histogram of loop exit counts #endif // COUNT_LOOPS @@ -8920,8 +8932,8 @@ struct NodeSizeStats }; extern NodeSizeStats genNodeSizeStats; // Total node size stats extern NodeSizeStats genNodeSizeStatsPerFunc; // Per-function node size stats -extern histo genTreeNcntHist; -extern histo genTreeNsizHist; +extern Histogram genTreeNcntHist; +extern Histogram genTreeNsizHist; #endif // MEASURE_NODE_SIZE /***************************************************************************** diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp index dc069fe0e6..da0ae901fe 100644 --- a/src/jit/ee_il_dll.cpp +++ b/src/jit/ee_il_dll.cpp @@ -22,7 +22,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX /*****************************************************************************/ -static ICorJitHost* g_jitHost = nullptr; +ICorJitHost* g_jitHost = nullptr; static CILJit* ILJitter = 0; // The one and only JITTER I return #ifndef FEATURE_MERGE_JIT_AND_ENGINE HINSTANCE g_hInst = NULL; diff --git a/src/jit/ee_il_dll.hpp b/src/jit/ee_il_dll.hpp index c61deb57a1..b1e0327d6b 100644 --- a/src/jit/ee_il_dll.hpp +++ b/src/jit/ee_il_dll.hpp @@ -2,6 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +extern ICorJitHost* g_jitHost; + class CILJit: public ICorJitCompiler { CorJitResult __stdcall compileMethod ( diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp index 493d13cde0..2cd045b59a 100644 --- a/src/jit/emit.cpp +++ b/src/jit/emit.cpp @@ -16,6 +16,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #pragma hdrstop #endif +#include "hostallocator.h" #include "instr.h" #include "emit.h" #include "codegen.h" @@ -264,13 +265,13 @@ static unsigned totActualSize; unsigned emitter::emitIFcounts[emitter::IF_COUNT]; static unsigned emitSizeBuckets[] = { 100, 1024*1, 1024*2, 1024*3, 1024*4, 1024*5, 1024*10, 0 }; -static histo emitSizeTable(DefaultAllocator::Singleton(), emitSizeBuckets); +static Histogram emitSizeTable(HostAllocator::getHostAllocator(), emitSizeBuckets); static unsigned GCrefsBuckets[] = { 0, 1, 2, 5, 10, 20, 50, 128, 256, 512, 1024, 0 }; -static histo GCrefsTable(DefaultAllocator::Singleton(), GCrefsBuckets); +static Histogram GCrefsTable(HostAllocator::getHostAllocator(), GCrefsBuckets); static unsigned stkDepthBuckets[] = { 0, 1, 2, 5, 10, 16, 32, 128, 1024, 0 }; -static histo stkDepthTable(DefaultAllocator::Singleton(), stkDepthBuckets); +static Histogram stkDepthTable(HostAllocator::getHostAllocator(), stkDepthBuckets); size_t emitter::emitSizeMethod; @@ -318,16 +319,16 @@ void emitterStaticStats(FILE* fout) fprintf(fout, "\n"); fprintf(fout, "insPlaceholderGroupData:\n"); - fprintf(fout, "Offset of igPhNext = %2u\n", offsetof(emitter::insPlaceholderGroupData, igPhNext )); - fprintf(fout, "Offset of igPhBB = %2u\n", offsetof(emitter::insPlaceholderGroupData, igPhBB )); - fprintf(fout, "Offset of igPhInitGCrefVars = %2u\n", offsetof(emitter::insPlaceholderGroupData, igPhInitGCrefVars )); - fprintf(fout, "Offset of igPhInitGCrefRegs = %2u\n", offsetof(emitter::insPlaceholderGroupData, igPhInitGCrefRegs )); - fprintf(fout, "Offset of igPhInitByrefRegs = %2u\n", offsetof(emitter::insPlaceholderGroupData, igPhInitByrefRegs )); - fprintf(fout, "Offset of igPhPrevGCrefVars = %2u\n", offsetof(emitter::insPlaceholderGroupData, igPhPrevGCrefVars )); - fprintf(fout, "Offset of igPhPrevGCrefRegs = %2u\n", offsetof(emitter::insPlaceholderGroupData, igPhPrevGCrefRegs )); - fprintf(fout, "Offset of igPhPrevByrefRegs = %2u\n", offsetof(emitter::insPlaceholderGroupData, igPhPrevByrefRegs )); - fprintf(fout, "Offset of igPhType = %2u\n", offsetof(emitter::insPlaceholderGroupData, igPhType )); - fprintf(fout, "Size of insPlaceholderGroupData = %u\n", sizeof( emitter::insPlaceholderGroupData )); + fprintf(fout, "Offset of igPhNext = %2u\n", offsetof(insPlaceholderGroupData, igPhNext )); + fprintf(fout, "Offset of igPhBB = %2u\n", offsetof(insPlaceholderGroupData, igPhBB )); + fprintf(fout, "Offset of igPhInitGCrefVars = %2u\n", offsetof(insPlaceholderGroupData, igPhInitGCrefVars )); + fprintf(fout, "Offset of igPhInitGCrefRegs = %2u\n", offsetof(insPlaceholderGroupData, igPhInitGCrefRegs )); + fprintf(fout, "Offset of igPhInitByrefRegs = %2u\n", offsetof(insPlaceholderGroupData, igPhInitByrefRegs )); + fprintf(fout, "Offset of igPhPrevGCrefVars = %2u\n", offsetof(insPlaceholderGroupData, igPhPrevGCrefVars )); + fprintf(fout, "Offset of igPhPrevGCrefRegs = %2u\n", offsetof(insPlaceholderGroupData, igPhPrevGCrefRegs )); + fprintf(fout, "Offset of igPhPrevByrefRegs = %2u\n", offsetof(insPlaceholderGroupData, igPhPrevByrefRegs )); + fprintf(fout, "Offset of igPhType = %2u\n", offsetof(insPlaceholderGroupData, igPhType )); + fprintf(fout, "Size of insPlaceholderGroupData = %u\n", sizeof( insPlaceholderGroupData )); fprintf(fout, "\n"); fprintf(fout, "Size of tinyID = %2u\n", TINY_IDSC_SIZE); @@ -421,15 +422,15 @@ void emitterStats(FILE* fout) } fprintf(fout, "Descriptor size distribution:\n"); - emitSizeTable.histoDsp(fout); + emitSizeTable.dump(fout); fprintf(fout, "\n"); fprintf(fout, "GC ref frame variable counts:\n"); - GCrefsTable.histoDsp(fout); + GCrefsTable.dump(fout); fprintf(fout, "\n"); fprintf(fout, "Max. stack depth distribution:\n"); - stkDepthTable.histoDsp(fout); + stkDepthTable.dump(fout); fprintf(fout, "\n"); int i; @@ -4394,9 +4395,9 @@ unsigned emitter::emitEndCodeGen(Compiler *comp, emitFullGCinfo = fullPtrMap; #if EMITTER_STATS - GCrefsTable.histoRec(emitGCrFrameOffsCnt, 1); - emitSizeTable.histoRec(emitSizeMethod , 1); - stkDepthTable.histoRec(emitMaxStackDepth , 1); + GCrefsTable.record(emitGCrFrameOffsCnt); + emitSizeTable.record(static_cast<unsigned>(emitSizeMethod)); + stkDepthTable.record(emitMaxStackDepth); #endif // EMITTER_STATS // Default values, correct even if EMIT_TRACK_STACK_DEPTH is 0. diff --git a/src/jit/emitarm64.cpp b/src/jit/emitarm64.cpp index 2b203499b4..d817b75c07 100644 --- a/src/jit/emitarm64.cpp +++ b/src/jit/emitarm64.cpp @@ -10824,7 +10824,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, } bool isMulOverflow = false; bool isUnsignedMul = false; - instruction ins2 = INS_invalid; regNumber extraReg = REG_NA; if (dst->gtOverflowEx()) { @@ -10840,7 +10839,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, { isMulOverflow = true; isUnsignedMul = ((dst->gtFlags & GTF_UNSIGNED) != 0); - ins2 = isUnsignedMul ? INS_umulh : INS_smulh; assert(intConst == nullptr); // overflow format doesn't support an int constant operand } else @@ -10856,43 +10854,66 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, { if (isMulOverflow) { + // Make sure that we have an internal register + assert(genCountBits(dst->gtRsvdRegs) == 2); + + // There will be two bits set in tmpRegsMask. + // Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask' + regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum); + assert(tmpRegsMask != RBM_NONE); + regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask + extraReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask + if (isUnsignedMul) { - assert(genCountBits(dst->gtRsvdRegs) == 1); - extraReg = genRegNumFromMask(dst->gtRsvdRegs); + if (attr == EA_4BYTE) + { + // Compute 8 byte results from 4 byte by 4 byte multiplication. + emitIns_R_R_R(INS_umull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); - // Compute the high result - emitIns_R_R_R(ins2, attr, extraReg, src1->gtRegNum, src2->gtRegNum); + // Get the high result by shifting dst. + emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32); + } + else + { + assert(attr == EA_8BYTE); + // Compute the high result. + emitIns_R_R_R(INS_umulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum); - emitIns_R_I(INS_cmp, EA_8BYTE, extraReg, 0); - codeGen->genCheckOverflow(dst); + // Now multiply without skewing the high result. + emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); + } - // Now multiply without skewing the high result if no overflow. - emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); + // zero-sign bit comparision to detect overflow. + emitIns_R_I(INS_cmp, attr, extraReg, 0); } else { - // Make sure that we have an internal register - assert(genCountBits(dst->gtRsvdRegs) == 2); - - // There will be two bits set in tmpRegsMask. - // Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask' - regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum); - regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask - extraReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask + int bitShift = 0; + if (attr == EA_4BYTE) + { + // Compute 8 byte results from 4 byte by 4 byte multiplication. + emitIns_R_R_R(INS_smull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); - // Make sure the two registers are not the same. - assert(extraReg != dst->gtRegNum); + // Get the high result by shifting dst. + emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32); - // Save the high result in a temporary register - emitIns_R_R_R(ins2, attr, extraReg, src1->gtRegNum, src2->gtRegNum); + bitShift = 31; + } + else + { + assert(attr == EA_8BYTE); + // Save the high result in a temporary register. + emitIns_R_R_R(INS_smulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum); - // Now multiply without skewing the high result. - emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); + // Now multiply without skewing the high result. + emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); - emitIns_R_R_I(INS_cmp, EA_8BYTE, extraReg, dst->gtRegNum, 63, INS_OPTS_ASR); + bitShift = 63; + } - codeGen->genCheckOverflow(dst); + // Sign bit comparision to detect overflow. + emitIns_R_R_I(INS_cmp, attr, extraReg, dst->gtRegNum, bitShift, INS_OPTS_ASR); } } else @@ -10902,7 +10923,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, } } - if (dst->gtOverflowEx() && !isMulOverflow) + if (dst->gtOverflowEx()) { assert(!varTypeIsFloating(dst)); codeGen->genCheckOverflow(dst); diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 4ca70e34ab..15af9aa848 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -2569,9 +2569,9 @@ void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node) { GenTreeIndir* mem = node->AsIndir(); - if (mem->HasBase() && mem->Base()->OperGet() == GT_CLS_VAR_ADDR) + if (mem->Addr()->OperGet() == GT_CLS_VAR_ADDR) { - emitIns_R_C(ins, attr, node->gtRegNum, mem->Base()->gtClsVar.gtClsVarHnd, 0); + emitIns_R_C(ins, attr, node->gtRegNum, mem->Addr()->gtClsVar.gtClsVarHnd, 0); return; } else if (mem->Addr()->OperGet() == GT_LCL_VAR_ADDR) @@ -2586,7 +2586,6 @@ void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node) GenTreePtr addr = mem->Addr(); assert (addr->OperIsAddrMode() || - addr->gtOper == GT_CLS_VAR_ADDR || (addr->IsCnsIntOrI() && addr->isContained()) || !addr->isContained()); size_t offset = mem->Offset(); @@ -2618,15 +2617,15 @@ void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node) size_t offset = mem->Offset(); GenTree* data = node->gtOp.gtOp2; - if ((memBase != nullptr) && (memBase->OperGet() == GT_CLS_VAR_ADDR)) + if (mem->Addr()->OperGet() == GT_CLS_VAR_ADDR) { if (data->isContained()) { - emitIns_C_I(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0, (int) data->AsIntConCommon()->IconValue()); + emitIns_C_I(ins, attr, mem->Addr()->gtClsVar.gtClsVarHnd, 0, (int) data->AsIntConCommon()->IconValue()); } else { - emitIns_C_R(ins, attr, memBase->gtClsVar.gtClsVarHnd, data->gtRegNum, 0); + emitIns_C_R(ins, attr, mem->Addr()->gtClsVar.gtClsVarHnd, data->gtRegNum, 0); } return; } diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp index 283c8ea32f..1bb3e1db88 100644 --- a/src/jit/flowgraph.cpp +++ b/src/jit/flowgraph.cpp @@ -4335,10 +4335,7 @@ DECODE_OPCODE: noway_assert(codeAddr < codeEndp - sz); if ((OPCODE) getU1LittleEndian(codeAddr + sz) == CEE_RET) { - compInlineeHints = (InlineHints)(compInlineeHints | InlLooksLikeWrapperMethod); -#ifdef DEBUG - //printf("CALL->RET pattern found in %s\n", info.compFullName); -#endif + compInlineResult->note(InlineObservation::CALLEE_LOOKS_LIKE_WRAPPER); } } break; @@ -4595,14 +4592,13 @@ INL_HANDLE_COMPARE: unsigned slot0 = pushedStack.getSlot0(); if (fgStack::isArgument(slot0)) { - compInlineeHints = (InlineHints)(compInlineeHints | InlArgFeedsConstantTest); + compInlineResult->note(InlineObservation::CALLEE_ARG_FEEDS_CONSTANT_TEST); //Check for the double whammy of an incoming constant argument feeding a //constant test. varNum = fgStack::slotTypeToArgNum(slot0); if (impInlineInfo->inlArgInfo[varNum].argNode->OperIsConst()) { - compInlineeHints = (InlineHints)(compInlineeHints - | InlIncomingConstFeedsCond); + compInlineResult->note(InlineObservation::CALLSITE_CONSTANT_ARG_FEEDS_TEST); } } } @@ -4617,13 +4613,13 @@ INL_HANDLE_COMPARE: if ((fgStack::isConstant(slot0) && fgStack::isArgument(slot1)) ||(fgStack::isConstant(slot1) && fgStack::isArgument(slot0))) { - compInlineeHints = (InlineHints)(compInlineeHints | InlArgFeedsConstantTest); + compInlineResult->note(InlineObservation::CALLEE_ARG_FEEDS_CONSTANT_TEST); } //Arg feeds range check if ((fgStack::isArrayLen(slot0) && fgStack::isArgument(slot1)) ||(fgStack::isArrayLen(slot1) && fgStack::isArgument(slot0))) { - compInlineeHints = (InlineHints)(compInlineeHints | InlArgFeedsRngChk); + compInlineResult->note(InlineObservation::CALLEE_ARG_FEEDS_RANGE_CHECK); } //Check for an incoming arg that's a constant. @@ -4632,7 +4628,7 @@ INL_HANDLE_COMPARE: varNum = fgStack::slotTypeToArgNum(slot0); if (impInlineInfo->inlArgInfo[varNum].argNode->OperIsConst()) { - compInlineeHints = (InlineHints)(compInlineeHints | InlIncomingConstFeedsCond); + compInlineResult->note(InlineObservation::CALLSITE_CONSTANT_ARG_FEEDS_TEST); } } if (fgStack::isArgument(slot1)) @@ -4640,7 +4636,7 @@ INL_HANDLE_COMPARE: varNum = fgStack::slotTypeToArgNum(slot1); if (impInlineInfo->inlArgInfo[varNum].argNode->OperIsConst()) { - compInlineeHints = (InlineHints)(compInlineeHints | InlIncomingConstFeedsCond); + compInlineResult->note(InlineObservation::CALLSITE_CONSTANT_ARG_FEEDS_TEST); } } } @@ -4824,7 +4820,15 @@ TOO_FAR: //This allows for CALL, RET, and one more non-ld/st instruction. if ((opts.instrCount - ldStCount) < 4 || ((double)ldStCount/(double)opts.instrCount) > .90) { - compInlineeHints = (InlineHints)(compInlineeHints | InlMethodMostlyLdSt); + // Note this is the one and only case where we don't guard the + // observation with compIsForInlining(). The prejit root must + // also make this observation. We'll fix this eventually as we + // make the LegacyPolicy smarter about what observations it + // cares about, and when. + if (compInlineResult != nullptr) + { + compInlineResult->note(InlineObservation::CALLEE_IS_MOSTLY_LOAD_STORE); + } } if (pSm) @@ -4842,7 +4846,6 @@ TOO_FAR: if (compIsForInlining()) { - // If the inlining decision was obvious from the size of the IL, // it should have been made earlier. noway_assert(codeSize > ALWAYS_INLINE_SIZE && codeSize <= impInlineSize); @@ -4852,7 +4855,6 @@ TOO_FAR: impCanInlineNative(callsiteNativeSizeEstimate, compNativeSizeEstimate, - compInlineeHints, impInlineInfo, compInlineResult); @@ -4875,9 +4877,9 @@ TOO_FAR: { if (compIsForInlining()) { - // This method's IL was small enough that we didn't use the size model to estimate - // inlinability. Note that as the latest candidate reason. - compInlineResult->noteCandidate(InlineObservation::CALLEE_BELOW_ALWAYS_INLINE_SIZE); + // This method's IL was small enough that we didn't use the size model to estimate + // inlinability. Note that as the latest candidate reason. + compInlineResult->noteCandidate(InlineObservation::CALLEE_BELOW_ALWAYS_INLINE_SIZE); } } @@ -21478,7 +21480,7 @@ void Compiler::fgInline() fgDispHandlerTab(); } - if (verbose || (fgInlinedCount > 0 && fgPrintInlinedMethods)) + if (verbose || fgPrintInlinedMethods) { printf("**************** Inline Tree\n"); rootContext->Dump(this); @@ -21949,7 +21951,14 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, if (result != CORJIT_OK) { - pParam->inlineInfo->inlineResult->noteFatal(InlineObservation::CALLSITE_COMPILATION_FAILURE); + // If we haven't yet determined why this inline fails, use + // a catch-all something bad happened observation. + InlineResult* innerInlineResult = pParam->inlineInfo->inlineResult; + + if (!innerInlineResult->isFailure()) + { + innerInlineResult->noteFatal(InlineObservation::CALLSITE_COMPILATION_FAILURE); + } } } } @@ -21962,7 +21971,13 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, eeGetMethodFullName(fncHandle)); } #endif // DEBUG - inlineResult->noteFatal(InlineObservation::CALLSITE_COMPILATION_ERROR); + + // If we haven't yet determined why this inline fails, use + // a catch-all something bad happened observation. + if (!inlineResult->isFailure()) + { + inlineResult->noteFatal(InlineObservation::CALLSITE_COMPILATION_ERROR); + } } endErrorTrap(); diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 9ce8f59b32..d7d2b64dd9 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -4544,7 +4544,7 @@ GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent) break; #if !FEATURE_MULTIREG_ARGS - // Note that when FEATURE_MULTIREG__ARGS==1 + // Note that when FEATURE_MULTIREG_ARGS==1 // a GT_LDOBJ node is handled above by the default case case GT_LDOBJ: // Any GT_LDOBJ with a field must be lowered before this point. diff --git a/src/jit/hostallocator.cpp b/src/jit/hostallocator.cpp new file mode 100644 index 0000000000..b737424ee8 --- /dev/null +++ b/src/jit/hostallocator.cpp @@ -0,0 +1,40 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#include "jitpch.h" +#include "hostallocator.h" + +HostAllocator HostAllocator::s_hostAllocator; + +void* HostAllocator::Alloc(size_t size) +{ + assert(g_jitHost != nullptr); + return g_jitHost->allocateMemory(size, false); +} + +void* HostAllocator::ArrayAlloc(size_t elemSize, size_t numElems) +{ + assert(g_jitHost != nullptr); + + ClrSafeInt<size_t> safeElemSize(elemSize); + ClrSafeInt<size_t> safeNumElems(numElems); + ClrSafeInt<size_t> size = safeElemSize * safeNumElems; + if (size.IsOverflow()) + { + return nullptr; + } + + return g_jitHost->allocateMemory(size.Value(), false); +} + +void HostAllocator::Free(void* p) +{ + assert(g_jitHost != nullptr); + g_jitHost->freeMemory(p, false); +} + +HostAllocator* HostAllocator::getHostAllocator() +{ + return &s_hostAllocator; +} diff --git a/src/jit/hostallocator.h b/src/jit/hostallocator.h new file mode 100644 index 0000000000..c51eccc75e --- /dev/null +++ b/src/jit/hostallocator.h @@ -0,0 +1,20 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +class HostAllocator : public IAllocator +{ +private: + static HostAllocator s_hostAllocator; + + HostAllocator() {} + +public: + void* Alloc(size_t size) override; + + void* ArrayAlloc(size_t elemSize, size_t numElems) override; + + void Free(void* p) override; + + static HostAllocator* getHostAllocator(); +}; diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index 4d063c7217..9bb76f118c 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -15522,7 +15522,6 @@ int Compiler::impEstimateCallsiteNativeSize(CORINFO_METHOD_INFO * methInfo) void Compiler::impCanInlineNative(int callsiteNativeEstimate, int calleeNativeSizeEstimate, - InlineHints inlineHints, InlineInfo* pInlineInfo, // NULL for static inlining hint for ngen. InlineResult* inlineResult) { @@ -15546,116 +15545,36 @@ void Compiler::impCanInlineNative(int callsiteNativeEstima } #endif - - //Compute all the static information first. - double multiplier = 0.0; - - // Increase the multiplier for instance constructors. + // Note if this method is an instance constructor if ((info.compFlags & CORINFO_FLG_CONSTRUCTOR) != 0 && (info.compFlags & CORINFO_FLG_STATIC) == 0) { - multiplier += 1.5; - -#ifdef DEBUG - if (verbose) - { - printf("\nmultiplier in instance constructors increased to %g.", (double)multiplier); - } -#endif - + inlineResult->note(InlineObservation::CALLEE_IS_INSTANCE_CTOR); } - // Bump up the multiplier for methods in promotable struct + // Note if this method's class is a promotable struct if ((info.compClassAttr & CORINFO_FLG_VALUECLASS) != 0) { lvaStructPromotionInfo structPromotionInfo; lvaCanPromoteStructType(info.compClassHnd, &structPromotionInfo, false); if (structPromotionInfo.canPromote) - { - multiplier += 3; - -#ifdef DEBUG - if (verbose) - { - printf("\nmultiplier in methods of promotable struct increased to %g.", multiplier); - } -#endif + { + inlineResult->note(InlineObservation::CALLEE_CLASS_PROMOTABLE); } } - //Check the rest of the static hints - if (inlineHints & InlLooksLikeWrapperMethod) - { - multiplier += 1.0; -#ifdef DEBUG - if (verbose) - printf("\nInline candidate looks like a wrapper method. Multipler increased to %g.", multiplier); -#endif - } - if (inlineHints & InlArgFeedsConstantTest) - { - multiplier += 1.0; -#ifdef DEBUG - if (verbose) - printf("\nInline candidate has an arg that feeds a constant test. Multipler increased to %g.", multiplier); -#endif - } - //Consider making this the same as "always inline" - if (inlineHints & InlMethodMostlyLdSt) - { - multiplier += 3.0; -#ifdef DEBUG - if (verbose) - printf("\nInline candidate is mostly loads and stores. Multipler increased to %g.", multiplier); -#endif - } -#if 0 - if (inlineHints & InlMethodContainsCondThrow) - { - multiplier += 1.5; -#ifdef DEBUG - if (verbose) - printf("\nInline candidate contains a conditional throw. Multipler increased to %g.", multiplier); -#endif - } -#endif - #ifdef FEATURE_SIMD + + // Note if this method is has SIMD args or return value if (pInlineInfo != nullptr && pInlineInfo->hasSIMDTypeArgLocalOrReturn) { - // The default value of the SIMD multiplier is set in clrconfigvalues.h. - // The current default value (3) addresses the inlining issues raised by the BepuPhysics benchmark - // (which required at least a value of 3), and appears to have a mild positive impact on ConsoleMandel - // (which only seemed to require a value of 1 to get the benefit). For most of the benchmarks, the - // effect of different values was within the standard deviation. This may be a place where future tuning - // (with additional benchmarks) would be valuable. - - static ConfigDWORD fJitInlineSIMDMultiplier; - int simdMultiplier = fJitInlineSIMDMultiplier.val(CLRConfig::INTERNAL_JitInlineSIMDMultiplier); - - multiplier += simdMultiplier; - JITDUMP("\nInline candidate has SIMD type args, locals or return value. Multipler increased to %g.", multiplier); + inlineResult->note(InlineObservation::CALLEE_HAS_SIMD); } -#endif // FEATURE_SIMD - if (inlineHints & InlArgFeedsRngChk) - { - multiplier += 0.5; -#ifdef DEBUG - if (verbose) - printf("\nInline candidate has arg that feeds range check. Multipler increased to %g.", multiplier); -#endif - } +#endif // FEATURE_SIMD - //Handle the dynamic flags. - if (inlineHints & InlIncomingConstFeedsCond) - { - multiplier += 3; -#ifdef DEBUG - if (verbose) - printf("\nInline candidate has const arg that conditional. Multipler increased to %g.", multiplier); -#endif - } + // Determine base multiplier given the various observations made so far. + double multiplier = inlineResult->determineMultiplier(); //Because it is an if ... else if, keep them in sorted order by multiplier. This ensures that we always //get the maximum multipler. Also, pInlineInfo is null for static hints. Make sure that the first case diff --git a/src/jit/inline.cpp b/src/jit/inline.cpp index 973c9bb025..b0519f2e19 100644 --- a/src/jit/inline.cpp +++ b/src/jit/inline.cpp @@ -660,7 +660,7 @@ void InlineResult::report() if (VERBOSE) { const char* obsString = inlGetObservationString(obs); - JITDUMP("INLINER: Marking %s as NOINLINE because of %s", callee, obsString); + JITDUMP("\nINLINER: Marking %s as NOINLINE because of %s\n", callee, obsString); } #endif // DEBUG diff --git a/src/jit/inline.def b/src/jit/inline.def index d602af8135..2678ee3051 100644 --- a/src/jit/inline.def +++ b/src/jit/inline.def @@ -65,18 +65,25 @@ INLINE_OBSERVATION(UNSUPPORTED_OPCODE, bool, "unsupported opcode", // ------ Callee Performance ------- -INLINE_OBSERVATION(HAS_SWITCH, bool, "has switch", PERFORMANCE, CALLEE) INLINE_OBSERVATION(LDFLD_STATIC_VALUECLASS, bool, "ldsfld of value class", PERFORMANCE, CALLEE) INLINE_OBSERVATION(TOO_MANY_BASIC_BLOCKS, bool, "too many basic blocks", PERFORMANCE, CALLEE) INLINE_OBSERVATION(TOO_MUCH_IL, bool, "too many il bytes", PERFORMANCE, CALLEE) // ------ Callee Information ------- +INLINE_OBSERVATION(ARG_FEEDS_CONSTANT_TEST, bool, "argument feeds constant test", INFORMATION, CALLEE) +INLINE_OBSERVATION(ARG_FEEDS_RANGE_CHECK, bool, "argument feeds range check", INFORMATION, CALLEE) INLINE_OBSERVATION(BELOW_ALWAYS_INLINE_SIZE, bool, "below ALWAYS_INLINE size", INFORMATION, CALLEE) INLINE_OBSERVATION(CAN_INLINE_IL, bool, "IL passes basic checks", INFORMATION, CALLEE) INLINE_OBSERVATION(CHECK_CAN_INLINE_IL, bool, "IL passes detailed checks", INFORMATION, CALLEE) +INLINE_OBSERVATION(CLASS_PROMOTABLE, bool, "promotable value class", INFORMATION, CALLEE) +INLINE_OBSERVATION(HAS_SIMD, bool, "has SIMD arg, local, or ret", INFORMATION, CALLEE) +INLINE_OBSERVATION(HAS_SWITCH, bool, "has switch", INFORMATION, CALLEE) INLINE_OBSERVATION(IS_FORCE_INLINE, bool, "aggressive inline attribute", INFORMATION, CALLEE) +INLINE_OBSERVATION(IS_INSTANCE_CTOR, bool, "instance constructor", INFORMATION, CALLEE) +INLINE_OBSERVATION(LOOKS_LIKE_WRAPPER, bool, "thin wrapper around a call", INFORMATION, CALLEE) INLINE_OBSERVATION(MAXSTACK, int, "maxstack", INFORMATION, CALLEE) +INLINE_OBSERVATION(IS_MOSTLY_LOAD_STORE, bool, "method is mostly load/store", INFORMATION, CALLEE) INLINE_OBSERVATION(NATIVE_SIZE_ESTIMATE, double, "native size estimate", INFORMATION, CALLEE) INLINE_OBSERVATION(NUMBER_OF_ARGUMENTS, int, "number of arguments", INFORMATION, CALLEE) INLINE_OBSERVATION(NUMBER_OF_BASIC_BLOCKS, int, "number of basic blocks", INFORMATION, CALLEE) @@ -136,7 +143,8 @@ INLINE_OBSERVATION(TOO_MANY_LOCALS, bool, "too many locals", INLINE_OBSERVATION(ARGS_OK, bool, "arguments suitable", INFORMATION, CALLSITE) INLINE_OBSERVATION(BENEFIT_MULTIPLIER, double, "benefit multiplier", INFORMATION, CALLSITE) -INLINE_OBSERVATION(DEPTH, int, "depth" , INFORMATION, CALLSITE) +INLINE_OBSERVATION(CONSTANT_ARG_FEEDS_TEST, bool, "constant argument feeds test", INFORMATION, CALLSITE) +INLINE_OBSERVATION(DEPTH, int, "depth", INFORMATION, CALLSITE) INLINE_OBSERVATION(LOCALS_OK, bool, "locals suitable", INFORMATION, CALLSITE) INLINE_OBSERVATION(NATIVE_SIZE_ESTIMATE, double, "native size estimate", INFORMATION, CALLSITE) INLINE_OBSERVATION(NATIVE_SIZE_ESTIMATE_OK, bool, "native size estimate ok", INFORMATION, CALLSITE) diff --git a/src/jit/inline.h b/src/jit/inline.h index 5a4012bdd9..75367faca9 100644 --- a/src/jit/inline.h +++ b/src/jit/inline.h @@ -13,7 +13,6 @@ // InlineTarget - target of a particular observation // InlineImpact - impact of a particular observation // InlineObservation - facts observed when considering an inline -// InlineHints - alternative form of observations // // -- CLASSES -- // @@ -63,10 +62,10 @@ // In DEBUG, the jit also searches for non-candidate calls to try // and get a complete picture of the set of failed inlines. // -// 4 & 5. Prejit suitability screens (compCompileHelper) +// 4. Prejit suitability screen (compCompileHelper) // // When prejitting, each method is scanned to see if it is a viable -// inline candidate. The scanning happens in two stages. +// inline candidate. // // A note on InlinePolicy // @@ -228,7 +227,10 @@ public: virtual void noteInt(InlineObservation obs, int value) = 0; virtual void noteDouble(InlineObservation obs, double value) = 0; - // Policy decisions + // Policy determinations + virtual double determineMultiplier() = 0; + + // Policy policies virtual bool propagateNeverToRuntime() const = 0; #ifdef DEBUG @@ -361,6 +363,12 @@ public: inlPolicy->noteDouble(obs, value); } + // Determine the benfit multiplier for this inline. + double determineMultiplier() + { + return inlPolicy->determineMultiplier(); + } + // Ensure details of this inlining process are appropriately // reported when the result goes out of scope. ~InlineResult() @@ -471,29 +479,6 @@ struct InlLclVarInfo bool lclHasLdlocaOp; // Is there LDLOCA(s) operation on this argument? }; -// InlineHints are a legacy form of observations. - -enum InlineHints -{ - //Static inline hints are here. - InlLooksLikeWrapperMethod = 0x0001, // The inline candidate looks like it's a simple wrapper method. - - InlArgFeedsConstantTest = 0x0002, // One or more of the incoming arguments feeds into a test - //against a constant. This is a good candidate for assertion - //prop. - - InlMethodMostlyLdSt = 0x0004, //This method is mostly loads and stores. - - InlMethodContainsCondThrow= 0x0008, //Method contains a conditional throw, so it does not bloat the - //code as much. - InlArgFeedsRngChk = 0x0010, //Incoming arg feeds an array bounds check. A good assertion - //prop candidate. - - //Dynamic inline hints are here. Only put hints that add to the multiplier in here. - InlIncomingConstFeedsCond = 0x0100, //Incoming argument is constant and feeds a conditional. - InlAllDynamicHints = InlIncomingConstFeedsCond -}; - // InlineInfo provides detailed information about a particular inline candidate. struct InlineInfo diff --git a/src/jit/inlinepolicy.cpp b/src/jit/inlinepolicy.cpp index 6de813eb78..31580aff9b 100644 --- a/src/jit/inlinepolicy.cpp +++ b/src/jit/inlinepolicy.cpp @@ -44,18 +44,6 @@ void LegacyPolicy::noteCandidate(InlineObservation obs) InlineImpact impact = inlGetImpact(obs); assert(impact == InlineImpact::INFORMATION); - switch (obs) - { - case InlineObservation::CALLEE_IS_FORCE_INLINE: - { - inlIsForceInline = true; - break; - } - - default: - break; - } - switch (inlDecision) { case InlineDecision::UNDECIDED: @@ -69,6 +57,9 @@ void LegacyPolicy::noteCandidate(InlineObservation obs) assert(!"Unexpected inlDecision"); unreached(); } + + // Now fall through to the general handling. + note(obs); } //------------------------------------------------------------------------ @@ -95,7 +86,55 @@ void LegacyPolicy::note(InlineObservation obs) // reported via noteFatal. assert(impact != InlineImpact::FATAL); - noteInternal(obs, impact); + // Handle most information here + bool isInformation = (impact == InlineImpact::INFORMATION); + bool propagate = !isInformation; + + if (isInformation) + { + switch (obs) + { + case InlineObservation::CALLEE_IS_FORCE_INLINE: + inlIsForceInline = true; + break; + case InlineObservation::CALLEE_IS_INSTANCE_CTOR: + inlIsInstanceCtor = true; + break; + case InlineObservation::CALLEE_CLASS_PROMOTABLE: + inlIsFromPromotableValueClass = true; + break; + case InlineObservation::CALLEE_HAS_SIMD: + inlHasSimd = true; + break; + case InlineObservation::CALLEE_LOOKS_LIKE_WRAPPER: + inlLooksLikeWrapperMethod = true; + break; + case InlineObservation::CALLEE_ARG_FEEDS_CONSTANT_TEST: + inlArgFeedsConstantTest = true; + break; + case InlineObservation::CALLEE_ARG_FEEDS_RANGE_CHECK: + inlArgFeedsRangeCheck = true; + break; + case InlineObservation::CALLEE_IS_MOSTLY_LOAD_STORE: + inlMethodIsMostlyLoadStore = true; + break; + case InlineObservation::CALLEE_HAS_SWITCH: + // Pass this one on, it should cause inlining to fail. + propagate = true; + break; + case InlineObservation::CALLSITE_CONSTANT_ARG_FEEDS_TEST: + inlConstantFeedsConstantTest = true; + break; + default: + // Ignore the remainder for now + break; + } + } + + if (propagate) + { + noteInternal(obs); + } } //------------------------------------------------------------------------ @@ -106,13 +145,10 @@ void LegacyPolicy::note(InlineObservation obs) void LegacyPolicy::noteFatal(InlineObservation obs) { - // Check the impact - InlineImpact impact = inlGetImpact(obs); - // As a safeguard, all fatal impact must be // reported via noteFatal. - assert(impact == InlineImpact::FATAL); - noteInternal(obs, impact); + assert(inlGetImpact(obs) == InlineImpact::FATAL); + noteInternal(obs); assert(inlDecisionIsFailure(inlDecision)); } @@ -131,7 +167,7 @@ void LegacyPolicy::noteInt(InlineObservation obs, int value) { unsigned calleeMaxStack = static_cast<unsigned>(value); - if (calleeMaxStack > SMALL_STACK_SIZE) + if (!inlIsForceInline && (calleeMaxStack > SMALL_STACK_SIZE)) { setNever(InlineObservation::CALLEE_MAXSTACK_TOO_BIG); } @@ -159,7 +195,7 @@ void LegacyPolicy::noteInt(InlineObservation obs, int value) unsigned ilByteSize = static_cast<unsigned>(value); - if (ilByteSize > inlCompiler->getImpInlineSize()) + if (!inlIsForceInline && (ilByteSize > inlCompiler->getImpInlineSize())) { setNever(InlineObservation::CALLEE_TOO_MUCH_IL); } @@ -192,17 +228,11 @@ void LegacyPolicy::noteDouble(InlineObservation obs, double value) // // Arguments: // obs - the current obsevation -// impact - impact of the current observation -void LegacyPolicy::noteInternal(InlineObservation obs, InlineImpact impact) +void LegacyPolicy::noteInternal(InlineObservation obs) { - // Ignore INFORMATION for now, since policy - // is still embedded at the observation sites. - if (impact == InlineImpact::INFORMATION) - { - return; - } - + // Note any INFORMATION that reaches here will now cause failure. + // Non-fatal INFORMATION observations must be handled higher up. InlineTarget target = inlGetTarget(obs); if (target == InlineTarget::CALLEE) @@ -272,3 +302,75 @@ void LegacyPolicy::setNever(InlineObservation obs) unreached(); } } + +//------------------------------------------------------------------------ +// determineMultiplier: determine benefit multiplier for this inline +// +// Notes: uses the accumulated set of observations to compute a +// profitability boost for the inline candidate. + +double LegacyPolicy::determineMultiplier() +{ + double multiplier = 0; + + // Bump up the multiplier for instance constructors + + if (inlIsInstanceCtor) + { + multiplier += 1.5; + JITDUMP("\nmultiplier in instance constructors increased to %g.", multiplier); + } + + // Bump up the multiplier for methods in promotable struct + + if (inlIsFromPromotableValueClass) + { + multiplier += 3; + JITDUMP("\nmultiplier in methods of promotable struct increased to %g.", multiplier); + } + +#ifdef FEATURE_SIMD + + if (inlHasSimd) + { + static ConfigDWORD fJitInlineSIMDMultiplier; + int simdMultiplier = fJitInlineSIMDMultiplier.val(CLRConfig::INTERNAL_JitInlineSIMDMultiplier); + + multiplier += simdMultiplier; + JITDUMP("\nInline candidate has SIMD type args, locals or return value. Multiplier increased to %g.", multiplier); + } + +#endif // FEATURE_SIMD + + if (inlLooksLikeWrapperMethod) + { + multiplier += 1.0; + JITDUMP("\nInline candidate looks like a wrapper method. Multiplier increased to %g.", multiplier); + } + + if (inlArgFeedsConstantTest) + { + multiplier += 1.0; + JITDUMP("\nInline candidate has an arg that feeds a constant test. Multiplier increased to %g.", multiplier); + } + + if (inlMethodIsMostlyLoadStore) + { + multiplier += 3.0; + JITDUMP("\nInline candidate is mostly loads and stores. Multiplier increased to %g.", multiplier); + } + + if (inlArgFeedsRangeCheck) + { + multiplier += 0.5; + JITDUMP("\nInline candidate has arg that feeds range check. Multiplier increased to %g.", multiplier); + } + + if (inlConstantFeedsConstantTest) + { + multiplier += 3.0; + JITDUMP("\nInline candidate has const arg that feeds a conditional. Multiplier increased to %g.", multiplier); + } + + return multiplier; +} diff --git a/src/jit/inlinepolicy.h b/src/jit/inlinepolicy.h index 746b1e6b57..42047d1a72 100644 --- a/src/jit/inlinepolicy.h +++ b/src/jit/inlinepolicy.h @@ -40,6 +40,14 @@ public: : InlinePolicy() , inlCompiler(compiler) , inlIsForceInline(false) + , inlIsInstanceCtor(false) + , inlIsFromPromotableValueClass(false) + , inlHasSimd(false) + , inlLooksLikeWrapperMethod(false) + , inlArgFeedsConstantTest(false) + , inlMethodIsMostlyLoadStore(false) + , inlArgFeedsRangeCheck(false) + , inlConstantFeedsConstantTest(false) { // empty } @@ -52,7 +60,10 @@ public: void noteInt(InlineObservation obs, int value) override; void noteDouble(InlineObservation obs, double value) override; - // Policy decisions + // Policy determinations + double determineMultiplier() override; + + // Policy policies bool propagateNeverToRuntime() const override { return true; } #ifdef DEBUG @@ -62,7 +73,7 @@ public: private: // Helper methods - void noteInternal(InlineObservation obs, InlineImpact impact); + void noteInternal(InlineObservation obs); void setFailure(InlineObservation obs); void setNever(InlineObservation obs); @@ -78,7 +89,15 @@ private: // Data members Compiler* inlCompiler; - bool inlIsForceInline; + bool inlIsForceInline :1; + bool inlIsInstanceCtor :1; + bool inlIsFromPromotableValueClass :1; + bool inlHasSimd :1; + bool inlLooksLikeWrapperMethod :1; + bool inlArgFeedsConstantTest :1; + bool inlMethodIsMostlyLoadStore :1; + bool inlArgFeedsRangeCheck :1; + bool inlConstantFeedsConstantTest :1; }; #endif // _INLINE_POLICY_H_ diff --git a/src/jit/jit.h b/src/jit/jit.h index af78504aa0..19a4f332d8 100644 --- a/src/jit/jit.h +++ b/src/jit/jit.h @@ -634,25 +634,22 @@ size_t unsigned_abs(ssize_t x) #if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE -class histo +class Histogram { public: - histo(IAllocator* alloc, unsigned* sizeTab, unsigned sizeCnt = 0); - ~histo(); + Histogram(IAllocator* allocator, const unsigned* const sizeTable); + ~Histogram(); - void histoClr(); - void histoDsp(FILE* fout); - void histoRec(unsigned __int64 siz, unsigned cnt); - void histoRec(unsigned siz, unsigned cnt); + void dump(FILE* output); + void record(unsigned size); private: + void ensureAllocated(); - void histoEnsureAllocated(); - - IAllocator* histoAlloc; - unsigned histoSizCnt; - unsigned* histoSizTab; - unsigned* histoCounts; + IAllocator* m_allocator; + unsigned m_sizeCount; + const unsigned* const m_sizeTable; + unsigned* m_counts; }; #endif // CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE diff --git a/src/jit/jit.settings.targets b/src/jit/jit.settings.targets index dfbe8f85f9..b95852f63c 100644 --- a/src/jit/jit.settings.targets +++ b/src/jit/jit.settings.targets @@ -82,6 +82,7 @@ <CppCompile Include="..\LoopCloning.cpp" /> <CppCompile Include="..\inline.cpp" /> <CppCompile Include="..\inlinepolicy.cpp" /> + <CppCompile Include="..\hostallocator.cpp" /> <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='True'" Include="..\CodeGenLegacy.cpp" /> <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\Lower.cpp" /> <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\LSRA.cpp" /> diff --git a/src/jit/jitstd/unordered_set.h b/src/jit/jitstd/unordered_set.h index 9f42518679..388e72426c 100644 --- a/src/jit/jitstd/unordered_set.h +++ b/src/jit/jitstd/unordered_set.h @@ -149,7 +149,8 @@ template <typename Value, typename Hash, typename Pred, typename Alloc> unordered_set<Value, Hash, Pred, Alloc>& unordered_set<Value, Hash, Pred, Alloc>::operator=(unordered_set const& other) { - return base_type::operator=(other); + base_type::operator=(other); + return *this; } } // end of namespace jitstd. diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index b40189574f..14ac29cbf5 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -428,7 +428,7 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo * varDscInfo) noway_assert(varDscInfo->intRegArgNum == 0); varDsc->lvArgReg = genMapRegArgNumToRegNum(varDscInfo->allocRegArg(TYP_INT), varDsc->TypeGet()); -#if FEATURE_MULTIREG__ARGS +#if FEATURE_MULTIREG_ARGS varDsc->lvOtherArgReg = REG_NA; #endif varDsc->setPrefReg(varDsc->lvArgReg, this); diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp index 294b9c2d68..87bc04ecab 100644 --- a/src/jit/lowerarm64.cpp +++ b/src/jit/lowerarm64.cpp @@ -347,12 +347,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) break; case GT_MUL: - if ((tree->gtFlags & GTF_UNSIGNED) != 0) - { - // unsigned mul should only need one register - info->internalIntCount = 1; - } - else if (tree->gtOverflow()) + if (tree->gtOverflow()) { // Need a register different from target reg to check // for signed overflow. diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 9fbf0a9550..a798e60d5f 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -2419,6 +2419,7 @@ LinearScan::getKillSetForNode(GenTree* tree) killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY); break; #ifdef _TARGET_AMD64_ + case GenTreeBlkOp::BlkOpKindRepInstr: // rep movs kills RCX, RDI and RSI killMask = RBM_RCX | RBM_RDI | RBM_RSI; @@ -2538,6 +2539,33 @@ LinearScan::getKillSetForNode(GenTree* tree) break; #endif // PROFILING_SUPPORTED && _TARGET_AMD64_ +#if FEATURE_MULTIREG_ARGS +#ifdef _TARGET_ARM64_ + case GT_PUTARG_REG: + // TODO-Cleanup: Remove this code after Issue #3524 is complete + // + // Handle the 16-byte pass-by-value TYP_STRUCT for ARM64 + // We actually write a second register that isn't being properly tracked + // We can prevent anyone else from being alive at this point by adding + // an extra RefTypeKill for the second register. + // + if (tree->TypeGet() == TYP_STRUCT) + { + TreeNodeInfo info = tree->gtLsraInfo; + regMaskTP dstMask = info.getDstCandidates(this); + + // Make sure that the dstMask represents two consecutive registers + regMaskTP lowRegBit = genFindLowestBit(dstMask); + regMaskTP nextRegBit = lowRegBit << 1; + regMaskTP regPairMask = (lowRegBit | nextRegBit); + + assert(dstMask == regPairMask); + + killMask = nextRegBit; // setup killMask to be the mask for the second register. + } +#endif // _TARGET_ARM64_ +#endif // FEATURE_MULTIREG_ARGS + default: // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE break; @@ -4537,6 +4565,8 @@ LinearScan::tryAllocateFreeReg(Interval *currentInterval, RefPosition *refPositi } #if FEATURE_MULTIREG_ARGS #ifdef _TARGET_ARM64_ + // TODO-Cleanup: Remove this code after Issue #3524 is complete + // // Handle the 16-byte pass-by-value TYP_STRUCT for ARM64 if (regType == TYP_STRUCT) { @@ -4897,7 +4927,12 @@ LinearScan::allocateBusyReg(Interval *current, RefPosition *refPosition) if (assignedInterval == nullptr) { RefPosition* nextPhysRegPosition = physRegRecord->getNextRefPosition(); + +#ifndef _TARGET_ARM64_ + // TODO-Cleanup: Revisit this after Issue #3524 is complete + // On ARM64 the nodeLocation is not always == refLocation, Disabling this assert for now. assert(nextPhysRegPosition->nodeLocation == refLocation && candidateBit != candidates); +#endif continue; } @@ -5080,6 +5115,7 @@ void LinearScan::assignPhysReg( RegRecord * regRec, Interval * interval) #if FEATURE_MULTIREG_ARGS_OR_RET #ifdef _TARGET_ARM64_ + // TODO-Cleanup: Remove this code after Issue #3524 is complete // Handle the 16-byte pass-by-value TYP_STRUCT for ARM64 if (interval->registerType == TYP_STRUCT) { @@ -5256,6 +5292,7 @@ void LinearScan::unassignPhysReg( RegRecord * regRec, RefPosition* spillRefPosit #if FEATURE_MULTIREG_ARGS_OR_RET #ifdef _TARGET_ARM64_ + // TODO-Cleanup: Remove this code after Issue #3524 is complete // Handle the 16-byte pass-by-value TYP_STRUCT for ARM64 if (assignedInterval->registerType == TYP_STRUCT) { @@ -6103,6 +6140,7 @@ LinearScan::allocateRegisters() // Identify the special cases where we decide up-front not to allocate bool allocate = true; + bool didDump = false; if (refType == RefTypeParamDef || refType == RefTypeZeroInit) { @@ -6114,6 +6152,7 @@ LinearScan::allocateRegisters() if (refType == RefTypeParamDef && varDsc->lvRefCntWtd <= BB_UNITY_WEIGHT) { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval)); + didDump = true; allocate = false; } // If it has no actual references, mark it as "lastUse"; since they're not actually part @@ -6142,9 +6181,10 @@ LinearScan::allocateRegisters() { unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition); } - else + else if (!didDump) { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); + didDump = true; } currentRefPosition->registerAssignment = RBM_NONE; continue; diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 954339e910..13c7398b99 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -2066,12 +2066,12 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum #if FEATURE_MULTIREG_ARGS #ifdef _TARGET_ARM64_ assert(varTypeIsStruct(type)); - if (structSize <= MAX_PASS_MULTIREG_BYTES) + if (varDsc->lvIsMultiregStruct()) { - assert(structSize > TARGET_POINTER_SIZE); // structSize must be 9..16 // ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD); // as that is how FEATURE_UNIX_AMD64_STRUCT_PASSING works. - // Pass by value in two registers + // Create a GT_LDOBJ for the argument + // This will be passed by value in two registers arg = gtNewOperNode(GT_ADDR, TYP_BYREF, arg); addrNode = arg; @@ -4170,7 +4170,7 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen // Update the flags call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT); } -#endif // FEATURE_MULTIREG_ARGS +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING // Make a copy of a struct variable if necessary, to pass to a callee. // returns: tree that computes address of the outgoing arg @@ -15255,15 +15255,13 @@ void Compiler::fgPromoteStructs() #endif // _TARGET_AMD64_ || _TARGET_ARM64_ #if FEATURE_MULTIREG_ARGS #if defined(_TARGET_ARM64_) - // TODO-PERF - Only do this when the LclVar is used in an argument context // - // For now we currently don't promote structs that can be passed in registers + // For now we currently don't promote structs that could be passed in registers // - unsigned structSize = lvaLclExactSize(lclNum); - if ((structSize > TARGET_POINTER_SIZE) && (structSize <= MAX_PASS_MULTIREG_BYTES)) + if (varDsc->lvIsMultiregStruct()) { JITDUMP("Not promoting promotable struct local V%02u (size==%d): ", - lclNum, structSize); + lclNum, lvaLclExactSize(lclNum)); continue; } #endif // _TARGET_ARM64_ diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp index ca4c43fcfa..6c2d579521 100644 --- a/src/jit/optimizer.cpp +++ b/src/jit/optimizer.cpp @@ -1855,7 +1855,7 @@ void Compiler::optFindNaturalLoops() loopsThisMethod++; /* keep track of the number of exits */ - loopExitCountTable.histoRec((unsigned)exitCount, 1); + loopExitCountTable.record(static_cast<unsigned>(exitCount)); #endif // COUNT_LOOPS } @@ -1865,7 +1865,7 @@ NO_LOOP: ; } #if COUNT_LOOPS - loopCountTable.histoRec(loopsThisMethod, 1); + loopCountTable.record(loopsThisMethod); if (maxLoopsPerMethod < loopsThisMethod) { maxLoopsPerMethod = loopsThisMethod; diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp index da48a87256..cb32378bbb 100644 --- a/src/jit/regalloc.cpp +++ b/src/jit/regalloc.cpp @@ -679,6 +679,8 @@ regNumber Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *ar #ifdef _TARGET_ARM64_ if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA)) { + assert(argDsc->lvIsMultiregStruct()); + regNumber secondArgReg = argDsc->lvOtherArgReg; noway_assert(regState->rsIsFloat == false); diff --git a/src/jit/utils.cpp b/src/jit/utils.cpp index f2539cad0c..2688a0111e 100644 --- a/src/jit/utils.cpp +++ b/src/jit/utils.cpp @@ -778,109 +778,92 @@ void ConfigMethodRange::initRanges(__in_z LPCWSTR rangeStr) * Histogram class. */ -histo::histo(IAllocator* alloc, unsigned * sizeTab, unsigned sizeCnt) : - histoAlloc(alloc), - histoCounts(nullptr) +Histogram::Histogram(IAllocator* allocator, const unsigned* const sizeTable) + : m_allocator(allocator) + , m_sizeTable(sizeTable) + , m_counts(nullptr) { - if (sizeCnt == 0) + unsigned sizeCount = 0; + do { - do - { - sizeCnt++; - } - while ((sizeTab[sizeCnt] != 0) && (sizeCnt < 1000)); + sizeCount++; } + while ((sizeTable[sizeCount] != 0) && (sizeCount < 1000)); - histoSizCnt = sizeCnt; - histoSizTab = sizeTab; + m_sizeCount = sizeCount; } -histo::~histo() +Histogram::~Histogram() { - histoAlloc->Free(histoCounts); + m_allocator->Free(m_counts); } -// We need to lazy allocate the histogram data so static "histo" variables don't try to call the CLR memory allocator -// in the loader lock, which doesn't work. -void histo::histoEnsureAllocated() +// We need to lazy allocate the histogram data so static `Histogram` variables don't try to +// call the host memory allocator in the loader lock, which doesn't work. +void Histogram::ensureAllocated() { - if (histoCounts == nullptr) + if (m_counts == nullptr) { - histoCounts = new (histoAlloc) unsigned[histoSizCnt + 1]; - histoClr(); + m_counts = new (m_allocator) unsigned[m_sizeCount + 1]; + memset(m_counts, 0, (m_sizeCount + 1) * sizeof(*m_counts)); } } -void histo::histoClr() +void Histogram::dump(FILE* output) { - histoEnsureAllocated(); - memset(histoCounts, 0, (histoSizCnt + 1) * sizeof(*histoCounts)); -} + ensureAllocated(); -void histo::histoDsp(FILE* fout) -{ - histoEnsureAllocated(); - - unsigned i; - unsigned c; - unsigned t; - - for (i = t = 0; i <= histoSizCnt; i++) + unsigned t = 0; + for (unsigned i = 0; i < m_sizeCount; i++) { - t += histoCounts[i]; + t += m_counts[i]; } - for (i = c = 0; i <= histoSizCnt; i++) + for (unsigned c = 0, i = 0; i <= m_sizeCount; i++) { - if (i == histoSizCnt) + if (i == m_sizeCount) { - if (!histoCounts[i]) + if (m_counts[i] == 0) + { break; + } - fprintf(fout, " > %7u", histoSizTab[i-1]); + fprintf(output, " > %7u", m_sizeTable[i - 1]); } else { if (i == 0) { - fprintf(fout, " <= "); + fprintf(output, " <= "); } else { - fprintf(fout, "%7u .. ", histoSizTab[i-1]+1); + fprintf(output, "%7u .. ", m_sizeTable[i - 1] + 1); } - fprintf(fout, "%7u", histoSizTab[i]); + fprintf(output, "%7u", m_sizeTable[i]); } - c += histoCounts[i]; + c += m_counts[i]; - fprintf(fout, " ===> %7u count (%3u%% of total)\n", histoCounts[i], (int)(100.0 * c / t)); + fprintf(output, " ===> %7u count (%3u%% of total)\n", m_counts[i], (int)(100.0 * c / t)); } } -void histo::histoRec(unsigned __int64 siz, unsigned cnt) -{ - assert(FitsIn<unsigned>(siz)); - histoRec((unsigned)siz, cnt); -} - -void histo::histoRec(unsigned siz, unsigned cnt) +void Histogram::record(unsigned size) { - histoEnsureAllocated(); + ensureAllocated(); - unsigned i; - unsigned * t; - - for (i = 0, t = histoSizTab; - i < histoSizCnt; - i++ , t++) + unsigned i; + for (i = 0; i < m_sizeCount; i++) { - if (*t >= siz) + if (m_sizeTable[i] >= size) + { break; + } } - histoCounts[i] += cnt; + m_counts[i]++; } #endif // CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE |