diff options
Diffstat (limited to 'src/jit/compiler.h')
-rw-r--r-- | src/jit/compiler.h | 297 |
1 files changed, 201 insertions, 96 deletions
diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 05047c5ecb..d8cd491063 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -691,12 +691,21 @@ public: // is now TYP_INT in the local variable table. It's not really unused, because it's in the tree. assert(varTypeIsStruct(lvType) || (lvType == TYP_BLK) || (lvPromoted && lvUnusedStruct)); + +#if defined(FEATURE_SIMD) && !defined(_TARGET_64BIT_) + // For 32-bit architectures, we make local variable SIMD12 types 16 bytes instead of just 12. We can't do + // this for arguments, which must be passed according the defined ABI. + if ((lvType == TYP_SIMD12) && !lvIsParam) + { + assert(lvExactSize == 12); + return 16; + } +#endif // defined(FEATURE_SIMD) && !defined(_TARGET_64BIT_) + return (unsigned)(roundUp(lvExactSize, TARGET_POINTER_SIZE)); } -#if defined(DEBUGGING_SUPPORT) || defined(DEBUG) unsigned lvSlotNum; // original slot # (if remapped) -#endif typeInfo lvVerTypeInfo; // type info needed for verification @@ -926,6 +935,14 @@ extern const char* PhaseNames[]; extern const char* PhaseEnums[]; extern const LPCWSTR PhaseShortNames[]; +// The following enum provides a simple 1:1 mapping to CLR API's +enum API_ICorJitInfo_Names +{ +#define DEF_CLR_API(name) API_##name, +#include "ICorJitInfo_API_names.h" + API_COUNT +}; + //--------------------------------------------------------------- // Compilation time. // @@ -949,6 +966,10 @@ struct CompTimeInfo unsigned __int64 m_totalCycles; unsigned __int64 m_invokesByPhase[PHASE_NUMBER_OF]; unsigned __int64 m_cyclesByPhase[PHASE_NUMBER_OF]; +#if MEASURE_CLRAPI_CALLS + unsigned __int64 m_CLRinvokesByPhase[PHASE_NUMBER_OF]; + unsigned __int64 m_CLRcyclesByPhase[PHASE_NUMBER_OF]; +#endif // For better documentation, we call EndPhase on // non-leaf phases. We should also call EndPhase on the // last leaf subphase; obviously, the elapsed cycles between the EndPhase @@ -960,12 +981,25 @@ struct CompTimeInfo unsigned __int64 m_parentPhaseEndSlop; bool m_timerFailure; +#if MEASURE_CLRAPI_CALLS + // The following measures the time spent inside each individual CLR API call. + unsigned m_allClrAPIcalls; + unsigned m_perClrAPIcalls[API_ICorJitInfo_Names::API_COUNT]; + unsigned __int64 m_allClrAPIcycles; + unsigned __int64 m_perClrAPIcycles[API_ICorJitInfo_Names::API_COUNT]; + unsigned __int32 m_maxClrAPIcycles[API_ICorJitInfo_Names::API_COUNT]; +#endif // MEASURE_CLRAPI_CALLS + CompTimeInfo(unsigned byteCodeBytes); #endif }; #ifdef FEATURE_JIT_METHOD_PERF +#if MEASURE_CLRAPI_CALLS +struct WrapICorJitInfo; +#endif + // This class summarizes the JIT time information over the course of a run: the number of methods compiled, // and the total and maximum timings. (These are instances of the "CompTimeInfo" type described above). // The operation of adding a single method's timing to the summary may be performed concurrently by several @@ -977,6 +1011,7 @@ class CompTimeSummaryInfo static CritSecObject s_compTimeSummaryLock; int m_numMethods; + int m_totMethods; CompTimeInfo m_total; CompTimeInfo m_maximum; @@ -996,13 +1031,14 @@ public: // This is the unique CompTimeSummaryInfo object for this instance of the runtime. static CompTimeSummaryInfo s_compTimeSummary; - CompTimeSummaryInfo() : m_numMethods(0), m_total(0), m_maximum(0), m_numFilteredMethods(0), m_filtered(0) + CompTimeSummaryInfo() + : m_numMethods(0), m_totMethods(0), m_total(0), m_maximum(0), m_numFilteredMethods(0), m_filtered(0) { } // Assumes that "info" is a completed CompTimeInfo for a compilation; adds it to the summary. // This is thread safe. - void AddInfo(CompTimeInfo& info); + void AddInfo(CompTimeInfo& info, bool includePhases); // Print the summary information to "f". // This is not thread-safe; assumed to be called by only one thread. @@ -1017,6 +1053,13 @@ class JitTimer { unsigned __int64 m_start; // Start of the compilation. unsigned __int64 m_curPhaseStart; // Start of the current phase. +#if MEASURE_CLRAPI_CALLS + unsigned __int64 m_CLRcallStart; // Start of the current CLR API call (if any). + unsigned __int64 m_CLRcallInvokes; // CLR API invokes under current outer so far + unsigned __int64 m_CLRcallCycles; // CLR API cycles under current outer so far. + int m_CLRcallAPInum; // The enum/index of the current CLR API call (or -1). + static double s_cyclesPerSec; // Cached for speedier measurements +#endif #ifdef DEBUG Phases m_lastPhase; // The last phase that was completed (or (Phases)-1 to start). #endif @@ -1045,9 +1088,15 @@ public: // Ends the current phase (argument is for a redundant check). void EndPhase(Phases phase); +#if MEASURE_CLRAPI_CALLS + // Start and end a timed CLR API call. + void CLRApiCallEnter(unsigned apix); + void CLRApiCallLeave(unsigned apix); +#endif // MEASURE_CLRAPI_CALLS + // Completes the timing of the current method, which is assumed to have "byteCodeBytes" bytes of bytecode, // and adds it to "sum". - void Terminate(Compiler* comp, CompTimeSummaryInfo& sum); + void Terminate(Compiler* comp, CompTimeSummaryInfo& sum, bool includePhases); // Attempts to query the cycle counter of the current thread. If successful, returns "true" and sets // *cycles to the cycle counter value. Otherwise, returns false and sets the "m_timerFailure" flag of @@ -1164,7 +1213,13 @@ struct fgArgTabEntry regNumber otherRegNum; // The (second) register to use when passing this argument. SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; -#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#elif defined(_TARGET_X86_) + __declspec(property(get = getIsStruct)) bool isStruct; + bool getIsStruct() + { + return varTypeIsStruct(node); + } +#endif // _TARGET_X86_ #ifdef _TARGET_ARM_ void SetIsHfaRegArg(bool hfaRegArg) @@ -1293,6 +1348,10 @@ public: { return hasStackArgs; } + bool AreArgsComplete() const + { + return argsComplete; + } }; #ifdef DEBUG @@ -1939,8 +1998,6 @@ public: GenTreeArgList* gtNewArgList(GenTreePtr op1, GenTreePtr op2); GenTreeArgList* gtNewArgList(GenTreePtr op1, GenTreePtr op2, GenTreePtr op3); - GenTreeArgList* gtNewAggregate(GenTree* element); - static fgArgTabEntryPtr gtArgEntryByArgNum(GenTreePtr call, unsigned argNum); static fgArgTabEntryPtr gtArgEntryByNode(GenTreePtr call, GenTreePtr node); fgArgTabEntryPtr gtArgEntryByLateArgIndex(GenTreePtr call, unsigned lateArgInx); @@ -1975,7 +2032,18 @@ public: GenTreePtr gtClone(GenTree* tree, bool complexOK = false); - GenTreePtr gtCloneExpr(GenTree* tree, unsigned addFlags = 0, unsigned varNum = (unsigned)-1, int varVal = 0); + // If `tree` is a lclVar with lclNum `varNum`, return an IntCns with value `varVal`; otherwise, + // create a copy of `tree`, adding specified flags, replacing uses of lclVar `deepVarNum` with + // IntCnses with value `deepVarVal`. + GenTreePtr gtCloneExpr( + GenTree* tree, unsigned addFlags, unsigned varNum, int varVal, unsigned deepVarNum, int deepVarVal); + + // Create a copy of `tree`, optionally adding specifed flags, and optionally mapping uses of local + // `varNum` to int constants with value `varVal`. + GenTreePtr gtCloneExpr(GenTree* tree, unsigned addFlags = 0, unsigned varNum = (unsigned)-1, int varVal = 0) + { + return gtCloneExpr(tree, addFlags, varNum, varVal, varNum, varVal); + } GenTreePtr gtReplaceTree(GenTreePtr stmt, GenTreePtr tree, GenTreePtr replacementTree); @@ -1997,7 +2065,7 @@ public: unsigned gtHashValue(GenTree* tree); - unsigned gtSetListOrder(GenTree* list, bool regs); + unsigned gtSetListOrder(GenTree* list, bool regs, bool isListCallArgs); void gtWalkOp(GenTree** op1, GenTree** op2, GenTree* adr, bool constOnly); @@ -2277,7 +2345,8 @@ public: DNER_VMNeedsStackAddr, DNER_LiveInOutOfHandler, DNER_LiveAcrossUnmanagedCall, - DNER_BlockOp, // Is read or written via a block operation that explicitly takes the address. + DNER_BlockOp, // Is read or written via a block operation that explicitly takes the address. + DNER_IsStructArg, // Is a struct passed as an argument in a way that requires a stack location. #ifdef JIT32_GCENCODER DNER_PinningRef, #endif @@ -2439,7 +2508,6 @@ public: void lvaInit(); - unsigned lvaArgSize(const void* argTok); unsigned lvaLclSize(unsigned varNum); unsigned lvaLclExactSize(unsigned varNum); @@ -2712,9 +2780,10 @@ protected: void impImportNewObjArray(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo); - bool impCanPInvokeInline(var_types callRetTyp); - bool impCanPInvokeInlineCallSite(var_types callRetTyp); - void impCheckForPInvokeCall(GenTreePtr call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags); + bool impCanPInvokeInline(BasicBlock* block); + bool impCanPInvokeInlineCallSite(BasicBlock* block); + void impCheckForPInvokeCall( + GenTreePtr call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags, BasicBlock* block); GenTreePtr impImportIndirectCall(CORINFO_SIG_INFO* sig, IL_OFFSETX ilOffset = BAD_IL_OFFSET); void impPopArgsForUnmanagedCall(GenTreePtr call, CORINFO_SIG_INFO* sig); @@ -2739,8 +2808,6 @@ protected: GenTreePtr impFixupCallStructReturn(GenTreePtr call, CORINFO_CLASS_HANDLE retClsHnd); - GenTreePtr impInitCallLongReturn(GenTreePtr call); - GenTreePtr impFixupStructReturnType(GenTreePtr op, CORINFO_CLASS_HANDLE retClsHnd); #ifdef DEBUG @@ -2764,7 +2831,6 @@ protected: void impImportLeave(BasicBlock* block); void impResetLeaveBlock(BasicBlock* block, unsigned jmpAddr); - BOOL impLocAllocOnStack(); GenTreePtr impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, @@ -2868,6 +2934,8 @@ public: unsigned flags, void* compileTimeHandle); + GenTreePtr getRuntimeContextTree(CORINFO_RUNTIME_LOOKUP_KIND kind); + GenTreePtr impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_LOOKUP* pLookup, void* compileTimeHandle); @@ -3148,8 +3216,6 @@ private: static LONG jitNestingLevel; #endif // DEBUG - bool seenConditionalJump; - static BOOL impIsAddressInLocal(GenTreePtr tree, GenTreePtr* lclVarTreeOut); void impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, InlineResult* inlineResult); @@ -3455,8 +3521,9 @@ public: void fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw); void fgMorphBlocks(); - bool fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(const char* msg)); + bool fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg)); + void fgCheckArgCnt(); void fgSetOptions(); #ifdef DEBUG @@ -3845,7 +3912,7 @@ public: // var_types getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, structPassingKind* wbPassStruct = nullptr, - unsigned structSize = 0); + unsigned structSize = 0); #ifdef DEBUG // Print a representation of "vnp" or "vn" on standard output. @@ -4072,7 +4139,7 @@ public: void fgUnreachableBlock(BasicBlock* block); - void fgRemoveJTrue(BasicBlock* block); + void fgRemoveConditionalJump(BasicBlock* block); BasicBlock* fgLastBBInMainFunction(); @@ -4204,6 +4271,7 @@ public: void fgDebugCheckLinks(bool morphTrees = false); void fgDebugCheckNodeLinks(BasicBlock* block, GenTreePtr stmt); void fgDebugCheckFlags(GenTreePtr tree); + void fgDebugCheckFlagsHelper(GenTreePtr tree, unsigned treeFlags, unsigned chkFlags); #endif #ifdef LEGACY_BACKEND @@ -4305,7 +4373,7 @@ protected: void fgLinkBasicBlocks(); - void fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget); + unsigned fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget); void fgCheckBasicBlockControlFlow(); @@ -4380,13 +4448,6 @@ private: GenTree* fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType = nullptr); GenTree* fgMakeMultiUse(GenTree** ppTree); - // After replacing oldChild with newChild, fixup the fgArgTabEntryPtr - // if it happens to be an argument to a call. - void fgFixupIfCallArg(ArrayStack<GenTree*>* parentStack, GenTree* oldChild, GenTree* newChild); - -public: - void fgFixupArgTabEntryPtr(GenTreePtr parentCall, GenTreePtr oldArg, GenTreePtr newArg); - private: // Recognize a bitwise rotation pattern and convert into a GT_ROL or a GT_ROR node. GenTreePtr fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree); @@ -4440,16 +4501,11 @@ private: // for sufficiently small offsets, we can rely on OS page protection to implicitly null-check addresses that we // know will be dereferenced. To know that reliance on implicit null checking is sound, we must further know that // all offsets between the top-level indirection and the bottom are constant, and that their sum is sufficiently - // small; hence the other fields of MorphAddrContext. Finally, the odd structure of GT_COPYBLK, in which the second - // argument is a GT_LIST, requires us to "tell" that List node that its parent is a GT_COPYBLK, so it "knows" that - // each of its arguments should be evaluated in MACK_Ind contexts. (This would not be true for GT_LIST nodes - // representing method call argument lists.) + // small; hence the other fields of MorphAddrContext. enum MorphAddrContextKind { MACK_Ind, MACK_Addr, - MACK_CopyBlock, // This is necessary so we know we have to start a new "Ind" context for each of the - // addresses in the arg list. }; struct MorphAddrContext { @@ -4513,7 +4569,7 @@ private: void fgMorphCallInline(GenTreeCall* call, InlineResult* result); void fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result); #if DEBUG - void fgNoteNonInlineCandidate(GenTreePtr tree, GenTreeCall* call); + void fgNoteNonInlineCandidate(GenTreeStmt* stmt, GenTreeCall* call); static fgWalkPreFn fgFindNonInlineCandidate; #endif GenTreePtr fgOptimizeDelegateConstructor(GenTreePtr call, CORINFO_CONTEXT_HANDLE* ExactContextHnd); @@ -4525,16 +4581,14 @@ private: GenTreePtr fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue = false); GenTreePtr fgMorphBlkNode(GenTreePtr tree, bool isDest); GenTreePtr fgMorphBlockOperand(GenTreePtr tree, var_types asgType, unsigned blockWidth, bool isDest); + void fgMorphUnsafeBlk(GenTreeObj* obj); GenTreePtr fgMorphCopyBlock(GenTreePtr tree); GenTreePtr fgMorphForRegisterFP(GenTreePtr tree); GenTreePtr fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac = nullptr); GenTreePtr fgMorphSmpOpPre(GenTreePtr tree); - GenTreePtr fgMorphDivByConst(GenTreeOp* tree); - GenTreePtr fgMorphModByConst(GenTreeOp* tree); GenTreePtr fgMorphModToSubMulDiv(GenTreeOp* tree); GenTreePtr fgMorphSmpOpOptional(GenTreeOp* tree); GenTreePtr fgMorphRecognizeBoxNullable(GenTree* compare); - bool fgShouldUseMagicNumberDivide(GenTreeOp* tree); GenTreePtr fgMorphToEmulatedFP(GenTreePtr tree); GenTreePtr fgMorphConst(GenTreePtr tree); @@ -4544,11 +4598,12 @@ public: private: #if LOCAL_ASSERTION_PROP + void fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTreePtr tree)); void fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree)); #endif void fgMorphTreeDone(GenTreePtr tree, GenTreePtr oldTree = nullptr DEBUGARG(int morphNum = 0)); - GenTreePtr fgMorphStmt; + GenTreeStmt* fgMorphStmt; unsigned fgGetBigOffsetMorphingTemp(var_types type); // We cache one temp per type to be // used when morphing big offset. @@ -4564,7 +4619,6 @@ private: void fgMarkUseDef(GenTreeLclVarCommon* tree, GenTree* asgdLclVar = nullptr); -#ifdef DEBUGGING_SUPPORT void fgBeginScopeLife(VARSET_TP* inScope, VarScopeDsc* var); void fgEndScopeLife(VARSET_TP* inScope, VarScopeDsc* var); @@ -4578,8 +4632,6 @@ private: void fgDispDebugScopes(); #endif // DEBUG -#endif // DEBUGGING_SUPPORT - //------------------------------------------------------------------------- // // The following keeps track of any code we've added for things like array @@ -4622,6 +4674,7 @@ private: void fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* result); void fgInsertInlineeBlocks(InlineInfo* pInlineInfo); GenTreePtr fgInlinePrependStatements(InlineInfo* inlineInfo); + void fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* block, GenTreePtr stmt); #if FEATURE_MULTIREG_RET GenTreePtr fgGetStructAsStructPtr(GenTreePtr tree); @@ -4905,6 +4958,7 @@ public: #define LPFLG_VAR_LIMIT 0x0100 // iterator is compared with a local var (var # found in lpVarLimit) #define LPFLG_CONST_LIMIT 0x0200 // iterator is compared with a constant (found in lpConstLimit) #define LPFLG_ARRLEN_LIMIT 0x0400 // iterator is compared with a.len or a[i].len (found in lpArrLenLimit) +#define LPFLG_SIMD_LIMIT 0x0080 // iterator is compared with Vector<T>.Count (found in lpConstLimit) #define LPFLG_HAS_PREHEAD 0x0800 // lpHead is known to be a preHead for this loop #define LPFLG_REMOVED 0x1000 // has been removed from the loop table (unrolled or optimized away) @@ -5205,6 +5259,11 @@ protected: static const int MIN_CSE_COST = 2; + // Keeps tracked cse indices + BitVecTraits* cseTraits; + EXPSET_TP cseFull; + EXPSET_TP cseEmpty; + /* Generic list of nodes - used by the CSE logic */ struct treeLst @@ -6237,7 +6296,7 @@ public: BOOL eeIsValueClass(CORINFO_CLASS_HANDLE clsHnd); -#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD) +#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD) || defined(TRACK_LSRA_STATS) bool IsSuperPMIException(unsigned code) { @@ -6334,10 +6393,19 @@ public: #endif } + inline bool IsTargetAbi(CORINFO_RUNTIME_ABI abi) + { +#if COR_JIT_EE_VERSION > 460 + return eeGetEEInfo()->targetAbi == abi; +#else + return CORINFO_DESKTOP_ABI == abi; +#endif + } + inline bool generateCFIUnwindCodes() { -#if COR_JIT_EE_VERSION > 460 && defined(UNIX_AMD64_ABI) - return eeGetEEInfo()->targetAbi == CORINFO_CORERT_ABI; +#ifdef UNIX_AMD64_ABI + return IsTargetAbi(CORINFO_CORERT_ABI); #else return false; #endif @@ -6522,8 +6590,6 @@ private: public: CodeGenInterface* codeGen; -#ifdef DEBUGGING_SUPPORT - // The following holds information about instr offsets in terms of generated code. struct IPmappingDsc @@ -6553,7 +6619,6 @@ public: typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, IL_OFFSETX, JitSimplerHashBehavior> CallSiteILOffsetTable; CallSiteILOffsetTable* genCallSite2ILOffsetMap; -#endif // DEBUGGING_SUPPORT unsigned genReturnLocal; // Local number for the return value when applicable. BasicBlock* genReturnBB; // jumped to when not optimizing for speed. @@ -6588,8 +6653,14 @@ public: { return codeGen->doDoubleAlign(); } - DWORD getCanDoubleAlign(); // Defined & used only by RegAlloc -#endif // DOUBLE_ALIGN + DWORD getCanDoubleAlign(); + bool shouldDoubleAlign(unsigned refCntStk, + unsigned refCntReg, + unsigned refCntWtdReg, + unsigned refCntStkParam, + unsigned refCntWtdStkDbl); +#endif // DOUBLE_ALIGN + __declspec(property(get = getFullPtrRegMap, put = setFullPtrRegMap)) bool genFullPtrRegMap; bool getFullPtrRegMap() { @@ -6829,6 +6900,11 @@ private: return InstructionSet_AVX; } + if (CanUseSSE3_4()) + { + return InstructionSet_SSE3_4; + } + // min bar is SSE2 assert(canUseSSE2()); return InstructionSet_SSE2; @@ -7072,7 +7148,7 @@ private: // and small int base type vectors. SIMDIntrinsicID impSIMDIntegralRelOpGreaterThanOrEqual( CORINFO_CLASS_HANDLE typeHnd, unsigned simdVectorSize, var_types baseType, GenTree** op1, GenTree** op2); -#endif // defined(_TARGET_AMD64_) && !defined(LEGACY_BACKEND) +#endif // defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) void setLclRelatedToSIMDIntrinsic(GenTreePtr tree); bool areFieldsContiguous(GenTreePtr op1, GenTreePtr op2); @@ -7261,6 +7337,16 @@ private: // Returns true if the TYP_SIMD locals on stack are aligned at their // preferred byte boundary specified by getSIMDTypeAlignment(). + // + // As per the Intel manual, the preferred alignment for AVX vectors is 32-bytes. On Amd64, + // RSP/EBP is aligned at 16-bytes, therefore to align SIMD types at 32-bytes we need even + // RSP/EBP to be 32-byte aligned. It is not clear whether additional stack space used in + // aligning stack is worth the benefit and for now will use 16-byte alignment for AVX + // 256-bit vectors with unaligned load/stores to/from memory. On x86, the stack frame + // is aligned to 4 bytes. We need to extend existing support for double (8-byte) alignment + // to 16 or 32 byte alignment for frames with local SIMD vars, if that is determined to be + // profitable. + // bool isSIMDTypeLocalAligned(unsigned varNum) { #if defined(FEATURE_SIMD) && ALIGN_SIMD_TYPES @@ -7270,8 +7356,7 @@ private: int off = lvaFrameAddress(varNum, &ebpBased); // TODO-Cleanup: Can't this use the lvExactSize on the varDsc? int alignment = getSIMDTypeAlignment(lvaTable[varNum].lvType); - bool isAligned = ((off % alignment) == 0); - noway_assert(isAligned || lvaTable[varNum].lvIsParam); + bool isAligned = (alignment <= STACK_ALIGN) && ((off % alignment) == 0); return isAligned; } #endif // FEATURE_SIMD @@ -7289,6 +7374,16 @@ private: #endif } + // Whether SSE3, SSE3, SSE4.1 and SSE4.2 is available + bool CanUseSSE3_4() const + { +#ifdef _TARGET_XARCH_ + return opts.compCanUseSSE3_4; +#else + return false; +#endif + } + bool canUseAVX() const { #ifdef FEATURE_AVX_SUPPORT @@ -7393,21 +7488,21 @@ public: struct Options { - CORJIT_FLAGS* jitFlags; // all flags passed from the EE - unsigned eeFlags; // CorJitFlag flags passed from the EE - unsigned compFlags; // method attributes + JitFlags* jitFlags; // all flags passed from the EE + unsigned compFlags; // method attributes codeOptimize compCodeOpt; // what type of code optimizations bool compUseFCOMI; bool compUseCMOV; #ifdef _TARGET_XARCH_ - bool compCanUseSSE2; // Allow CodeGen to use "movq XMM" instructions + bool compCanUseSSE2; // Allow CodeGen to use "movq XMM" instructions + bool compCanUseSSE3_4; // Allow CodeGen to use SSE3, SSSE3, SSE4.1 and SSE4.2 instructions #ifdef FEATURE_AVX_SUPPORT bool compCanUseAVX; // Allow CodeGen to use AVX 256-bit vectors for SIMD operations -#endif -#endif +#endif // FEATURE_AVX_SUPPORT +#endif // _TARGET_XARCH_ // optimize maximally and/or favor speed over size? @@ -7464,7 +7559,7 @@ public: #ifdef FEATURE_READYTORUN_COMPILER inline bool IsReadyToRun() { - return (eeFlags & CORJIT_FLG_READYTORUN) != 0; + return jitFlags->IsSet(JitFlags::JIT_FLAG_READYTORUN); } #else inline bool IsReadyToRun() @@ -7478,7 +7573,7 @@ public: inline bool ShouldUsePInvokeHelpers() { #if COR_JIT_EE_VERSION > 460 - return (jitFlags->corJitFlags2 & CORJIT_FLG2_USE_PINVOKE_HELPERS) != 0; + return jitFlags->IsSet(JitFlags::JIT_FLAG_USE_PINVOKE_HELPERS); #else return false; #endif @@ -7489,7 +7584,7 @@ public: inline bool IsReversePInvoke() { #if COR_JIT_EE_VERSION > 460 - return (jitFlags->corJitFlags2 & CORJIT_FLG2_REVERSE_PINVOKE) != 0; + return jitFlags->IsSet(JitFlags::JIT_FLAG_REVERSE_PINVOKE); #else return false; #endif @@ -7499,7 +7594,7 @@ public: inline bool IsJit32Compat() { #if defined(_TARGET_X86_) && COR_JIT_EE_VERSION > 460 - return (jitFlags->corJitFlags2 & CORJIT_FLG2_DESKTOP_QUIRKS) != 0; + return jitFlags->IsSet(JitFlags::JIT_FLAG_DESKTOP_QUIRKS); #else return false; #endif @@ -7509,7 +7604,7 @@ public: inline bool IsJit64Compat() { #if defined(_TARGET_AMD64_) && COR_JIT_EE_VERSION > 460 - return (jitFlags->corJitFlags2 & CORJIT_FLG2_DESKTOP_QUIRKS) != 0; + return jitFlags->IsSet(JitFlags::JIT_FLAG_DESKTOP_QUIRKS); #elif defined(_TARGET_AMD64_) && !defined(FEATURE_CORECLR) return true; #else @@ -7517,14 +7612,10 @@ public: #endif } -#ifdef DEBUGGING_SUPPORT bool compScopeInfo; // Generate the LocalVar info ? bool compDbgCode; // Generate debugger-friendly code? bool compDbgInfo; // Gather debugging info? bool compDbgEnC; -#else - static const bool compDbgCode; -#endif #ifdef PROFILING_SUPPORTED bool compNoPInvokeInlineCB; @@ -7584,6 +7675,7 @@ public: bool altJit; // True if we are an altjit and are compiling this method #ifdef DEBUG + bool optRepeat; // Repeat optimizer phases k times bool compProcedureSplittingEH; // Separate cold code from hot code for functions with EH bool dspCode; // Display native code generated bool dspEHTable; // Display the EH table reported to the VM @@ -7623,9 +7715,11 @@ public: // for any call. We have a plan for not needing for stubs though bool compNeedStackProbes; - // Whether to emit Enter/Leave/TailCall hooks using a dummy stub (DummyProfilerELTStub()) - // This options helps one to make JIT behave as if it is under profiler. +#ifdef PROFILING_SUPPORTED + // Whether to emit Enter/Leave/TailCall hooks using a dummy stub (DummyProfilerELTStub()). + // This option helps make the JIT behave as if it is running under a profiler. bool compJitELTHookEnabled; +#endif // PROFILING_SUPPORTED #if FEATURE_TAILCALL_OPT // Whether opportunistic or implicit tail call optimization is enabled. @@ -7650,8 +7744,6 @@ public: #ifdef DEBUG - static bool s_dspMemStats; // Display per-phase memory statistics for every function - template <typename T> T dspPtr(T p) { @@ -7759,8 +7851,8 @@ public: codeOptimize compCodeOpt() { #if 0 - // Switching between size & speed has measurable throughput impact - // (3.5% on NGen mscorlib when measured). It used to be enabled for + // Switching between size & speed has measurable throughput impact + // (3.5% on NGen mscorlib when measured). It used to be enabled for // DEBUG, but should generate identical code between CHK & RET builds, // so that's not acceptable. // TODO-Throughput: Figure out what to do about size vs. speed & throughput. @@ -7772,10 +7864,6 @@ public: #endif } -#ifdef DEBUG - CLRRandom* inlRNG; -#endif - //--------------------- Info about the procedure -------------------------- struct Info @@ -7855,8 +7943,6 @@ public: // and the VM expects that, or the JIT is a "self-host" compiler // (e.g., x86 hosted targeting x86) and the VM expects that. -#if defined(DEBUGGING_SUPPORT) || defined(DEBUG) - /* The following holds IL scope information about local variables. */ @@ -7871,8 +7957,6 @@ public: unsigned compStmtOffsetsCount; ICorDebugInfo::BoundaryTypes compStmtOffsetsImplicit; -#endif // DEBUGGING_SUPPORT || DEBUG - #define CPU_X86 0x0100 // The generic X86 CPU #define CPU_X86_PENTIUM_4 0x0110 @@ -7937,9 +8021,12 @@ public: // Such method's compRetNativeType is TYP_STRUCT without a hidden RetBufArg return varTypeIsStruct(info.compRetNativeType) && (info.compRetBuffArg == BAD_VAR_NUM); #endif // TARGET_XXX + #else // not FEATURE_MULTIREG_RET + // For this architecture there are no multireg returns return false; + #endif // FEATURE_MULTIREG_RET } @@ -7960,7 +8047,7 @@ public: void compDispLocalVars(); -#endif // DEBUGGING_SUPPORT || DEBUG +#endif // DEBUG //-------------------------- Global Compiler Data ------------------------------------ @@ -8059,19 +8146,22 @@ public: CORINFO_METHOD_INFO* methodInfo, void** methodCodePtr, ULONG* methodCodeSize, - CORJIT_FLAGS* compileFlags); + JitFlags* compileFlags); void compCompileFinish(); int compCompileHelper(CORINFO_MODULE_HANDLE classPtr, COMP_HANDLE compHnd, CORINFO_METHOD_INFO* methodInfo, void** methodCodePtr, ULONG* methodCodeSize, - CORJIT_FLAGS* compileFlags, + JitFlags* compileFlags, CorInfoInstantiationVerification instVerInfo); ArenaAllocator* compGetAllocator(); #if MEASURE_MEM_ALLOC + + static bool s_dspMemStats; // Display per-phase memory statistics for every function + struct MemStats { unsigned allocCnt; // # of allocs @@ -8195,9 +8285,8 @@ public: void compDspSrcLinesByLineNum(unsigned line, bool seek = false); #endif // DEBUG -//------------------------------------------------------------------------- + //------------------------------------------------------------------------- -#ifdef DEBUGGING_SUPPORT typedef ListNode<VarScopeDsc*> VarScopeListNode; struct VarScopeMapInfo @@ -8255,8 +8344,6 @@ public: void compDispScopeLists(); #endif // DEBUG -#endif // DEBUGGING_SUPPORT - bool compIsProfilerHookNeeded(); //------------------------------------------------------------------------- @@ -8299,7 +8386,7 @@ public: protected: size_t compMaxUncheckedOffsetForNullObject; - void compInitOptions(CORJIT_FLAGS* compileFlags); + void compInitOptions(JitFlags* compileFlags); void compSetProcessor(); void compInitDebuggingInfo(); @@ -8307,16 +8394,22 @@ protected: #ifdef _TARGET_ARMARCH_ bool compRsvdRegCheck(FrameLayoutState curState); #endif - void compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_FLAGS* compileFlags); + void compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags* compileFlags); - // Data required for generating profiler Enter/Leave/TailCall hooks - CLANG_FORMAT_COMMENT_ANCHOR; + // Clear annotations produced during optimizations; to be used between iterations when repeating opts. + void ResetOptAnnotations(); + + // Regenerate loop descriptors; to be used between iterations when repeating opts. + void RecomputeLoopInfo(); #ifdef PROFILING_SUPPORTED + // Data required for generating profiler Enter/Leave/TailCall hooks + bool compProfilerHookNeeded; // Whether profiler Enter/Leave/TailCall hook needs to be generated for the method void* compProfilerMethHnd; // Profiler handle of the method being compiled. Passed as param to ELT callbacks bool compProfilerMethHndIndirected; // Whether compProfilerHandle is pointer to the handle or is an actual handle #endif + #ifdef _TARGET_AMD64_ bool compQuirkForPPP(); // Check if this method should be Quirked for the PPP issue #endif @@ -8692,6 +8785,18 @@ private: #endif inline void EndPhase(Phases phase); // Indicate the end of the given phase. +#if MEASURE_CLRAPI_CALLS + // Thin wrappers that call into JitTimer (if present). + inline void CLRApiCallEnter(unsigned apix); + inline void CLRApiCallLeave(unsigned apix); + +public: + inline void CLR_API_Enter(API_ICorJitInfo_Names ename); + inline void CLR_API_Leave(API_ICorJitInfo_Names ename); + +private: +#endif + #if defined(DEBUG) || defined(INLINE_DATA) || defined(FEATURE_CLRSQM) // These variables are associated with maintaining SQM data about compile time. unsigned __int64 m_compCyclesAtEndOfInlining; // The thread-virtualized cycle count at the end of the inlining phase |