summaryrefslogtreecommitdiff
path: root/src/jit/compiler.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/compiler.h')
-rw-r--r--src/jit/compiler.h297
1 files changed, 201 insertions, 96 deletions
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 05047c5ecb..d8cd491063 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -691,12 +691,21 @@ public:
// is now TYP_INT in the local variable table. It's not really unused, because it's in the tree.
assert(varTypeIsStruct(lvType) || (lvType == TYP_BLK) || (lvPromoted && lvUnusedStruct));
+
+#if defined(FEATURE_SIMD) && !defined(_TARGET_64BIT_)
+ // For 32-bit architectures, we make local variable SIMD12 types 16 bytes instead of just 12. We can't do
+ // this for arguments, which must be passed according the defined ABI.
+ if ((lvType == TYP_SIMD12) && !lvIsParam)
+ {
+ assert(lvExactSize == 12);
+ return 16;
+ }
+#endif // defined(FEATURE_SIMD) && !defined(_TARGET_64BIT_)
+
return (unsigned)(roundUp(lvExactSize, TARGET_POINTER_SIZE));
}
-#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
unsigned lvSlotNum; // original slot # (if remapped)
-#endif
typeInfo lvVerTypeInfo; // type info needed for verification
@@ -926,6 +935,14 @@ extern const char* PhaseNames[];
extern const char* PhaseEnums[];
extern const LPCWSTR PhaseShortNames[];
+// The following enum provides a simple 1:1 mapping to CLR API's
+enum API_ICorJitInfo_Names
+{
+#define DEF_CLR_API(name) API_##name,
+#include "ICorJitInfo_API_names.h"
+ API_COUNT
+};
+
//---------------------------------------------------------------
// Compilation time.
//
@@ -949,6 +966,10 @@ struct CompTimeInfo
unsigned __int64 m_totalCycles;
unsigned __int64 m_invokesByPhase[PHASE_NUMBER_OF];
unsigned __int64 m_cyclesByPhase[PHASE_NUMBER_OF];
+#if MEASURE_CLRAPI_CALLS
+ unsigned __int64 m_CLRinvokesByPhase[PHASE_NUMBER_OF];
+ unsigned __int64 m_CLRcyclesByPhase[PHASE_NUMBER_OF];
+#endif
// For better documentation, we call EndPhase on
// non-leaf phases. We should also call EndPhase on the
// last leaf subphase; obviously, the elapsed cycles between the EndPhase
@@ -960,12 +981,25 @@ struct CompTimeInfo
unsigned __int64 m_parentPhaseEndSlop;
bool m_timerFailure;
+#if MEASURE_CLRAPI_CALLS
+ // The following measures the time spent inside each individual CLR API call.
+ unsigned m_allClrAPIcalls;
+ unsigned m_perClrAPIcalls[API_ICorJitInfo_Names::API_COUNT];
+ unsigned __int64 m_allClrAPIcycles;
+ unsigned __int64 m_perClrAPIcycles[API_ICorJitInfo_Names::API_COUNT];
+ unsigned __int32 m_maxClrAPIcycles[API_ICorJitInfo_Names::API_COUNT];
+#endif // MEASURE_CLRAPI_CALLS
+
CompTimeInfo(unsigned byteCodeBytes);
#endif
};
#ifdef FEATURE_JIT_METHOD_PERF
+#if MEASURE_CLRAPI_CALLS
+struct WrapICorJitInfo;
+#endif
+
// This class summarizes the JIT time information over the course of a run: the number of methods compiled,
// and the total and maximum timings. (These are instances of the "CompTimeInfo" type described above).
// The operation of adding a single method's timing to the summary may be performed concurrently by several
@@ -977,6 +1011,7 @@ class CompTimeSummaryInfo
static CritSecObject s_compTimeSummaryLock;
int m_numMethods;
+ int m_totMethods;
CompTimeInfo m_total;
CompTimeInfo m_maximum;
@@ -996,13 +1031,14 @@ public:
// This is the unique CompTimeSummaryInfo object for this instance of the runtime.
static CompTimeSummaryInfo s_compTimeSummary;
- CompTimeSummaryInfo() : m_numMethods(0), m_total(0), m_maximum(0), m_numFilteredMethods(0), m_filtered(0)
+ CompTimeSummaryInfo()
+ : m_numMethods(0), m_totMethods(0), m_total(0), m_maximum(0), m_numFilteredMethods(0), m_filtered(0)
{
}
// Assumes that "info" is a completed CompTimeInfo for a compilation; adds it to the summary.
// This is thread safe.
- void AddInfo(CompTimeInfo& info);
+ void AddInfo(CompTimeInfo& info, bool includePhases);
// Print the summary information to "f".
// This is not thread-safe; assumed to be called by only one thread.
@@ -1017,6 +1053,13 @@ class JitTimer
{
unsigned __int64 m_start; // Start of the compilation.
unsigned __int64 m_curPhaseStart; // Start of the current phase.
+#if MEASURE_CLRAPI_CALLS
+ unsigned __int64 m_CLRcallStart; // Start of the current CLR API call (if any).
+ unsigned __int64 m_CLRcallInvokes; // CLR API invokes under current outer so far
+ unsigned __int64 m_CLRcallCycles; // CLR API cycles under current outer so far.
+ int m_CLRcallAPInum; // The enum/index of the current CLR API call (or -1).
+ static double s_cyclesPerSec; // Cached for speedier measurements
+#endif
#ifdef DEBUG
Phases m_lastPhase; // The last phase that was completed (or (Phases)-1 to start).
#endif
@@ -1045,9 +1088,15 @@ public:
// Ends the current phase (argument is for a redundant check).
void EndPhase(Phases phase);
+#if MEASURE_CLRAPI_CALLS
+ // Start and end a timed CLR API call.
+ void CLRApiCallEnter(unsigned apix);
+ void CLRApiCallLeave(unsigned apix);
+#endif // MEASURE_CLRAPI_CALLS
+
// Completes the timing of the current method, which is assumed to have "byteCodeBytes" bytes of bytecode,
// and adds it to "sum".
- void Terminate(Compiler* comp, CompTimeSummaryInfo& sum);
+ void Terminate(Compiler* comp, CompTimeSummaryInfo& sum, bool includePhases);
// Attempts to query the cycle counter of the current thread. If successful, returns "true" and sets
// *cycles to the cycle counter value. Otherwise, returns false and sets the "m_timerFailure" flag of
@@ -1164,7 +1213,13 @@ struct fgArgTabEntry
regNumber otherRegNum; // The (second) register to use when passing this argument.
SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
-#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#elif defined(_TARGET_X86_)
+ __declspec(property(get = getIsStruct)) bool isStruct;
+ bool getIsStruct()
+ {
+ return varTypeIsStruct(node);
+ }
+#endif // _TARGET_X86_
#ifdef _TARGET_ARM_
void SetIsHfaRegArg(bool hfaRegArg)
@@ -1293,6 +1348,10 @@ public:
{
return hasStackArgs;
}
+ bool AreArgsComplete() const
+ {
+ return argsComplete;
+ }
};
#ifdef DEBUG
@@ -1939,8 +1998,6 @@ public:
GenTreeArgList* gtNewArgList(GenTreePtr op1, GenTreePtr op2);
GenTreeArgList* gtNewArgList(GenTreePtr op1, GenTreePtr op2, GenTreePtr op3);
- GenTreeArgList* gtNewAggregate(GenTree* element);
-
static fgArgTabEntryPtr gtArgEntryByArgNum(GenTreePtr call, unsigned argNum);
static fgArgTabEntryPtr gtArgEntryByNode(GenTreePtr call, GenTreePtr node);
fgArgTabEntryPtr gtArgEntryByLateArgIndex(GenTreePtr call, unsigned lateArgInx);
@@ -1975,7 +2032,18 @@ public:
GenTreePtr gtClone(GenTree* tree, bool complexOK = false);
- GenTreePtr gtCloneExpr(GenTree* tree, unsigned addFlags = 0, unsigned varNum = (unsigned)-1, int varVal = 0);
+ // If `tree` is a lclVar with lclNum `varNum`, return an IntCns with value `varVal`; otherwise,
+ // create a copy of `tree`, adding specified flags, replacing uses of lclVar `deepVarNum` with
+ // IntCnses with value `deepVarVal`.
+ GenTreePtr gtCloneExpr(
+ GenTree* tree, unsigned addFlags, unsigned varNum, int varVal, unsigned deepVarNum, int deepVarVal);
+
+ // Create a copy of `tree`, optionally adding specifed flags, and optionally mapping uses of local
+ // `varNum` to int constants with value `varVal`.
+ GenTreePtr gtCloneExpr(GenTree* tree, unsigned addFlags = 0, unsigned varNum = (unsigned)-1, int varVal = 0)
+ {
+ return gtCloneExpr(tree, addFlags, varNum, varVal, varNum, varVal);
+ }
GenTreePtr gtReplaceTree(GenTreePtr stmt, GenTreePtr tree, GenTreePtr replacementTree);
@@ -1997,7 +2065,7 @@ public:
unsigned gtHashValue(GenTree* tree);
- unsigned gtSetListOrder(GenTree* list, bool regs);
+ unsigned gtSetListOrder(GenTree* list, bool regs, bool isListCallArgs);
void gtWalkOp(GenTree** op1, GenTree** op2, GenTree* adr, bool constOnly);
@@ -2277,7 +2345,8 @@ public:
DNER_VMNeedsStackAddr,
DNER_LiveInOutOfHandler,
DNER_LiveAcrossUnmanagedCall,
- DNER_BlockOp, // Is read or written via a block operation that explicitly takes the address.
+ DNER_BlockOp, // Is read or written via a block operation that explicitly takes the address.
+ DNER_IsStructArg, // Is a struct passed as an argument in a way that requires a stack location.
#ifdef JIT32_GCENCODER
DNER_PinningRef,
#endif
@@ -2439,7 +2508,6 @@ public:
void lvaInit();
- unsigned lvaArgSize(const void* argTok);
unsigned lvaLclSize(unsigned varNum);
unsigned lvaLclExactSize(unsigned varNum);
@@ -2712,9 +2780,10 @@ protected:
void impImportNewObjArray(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo);
- bool impCanPInvokeInline(var_types callRetTyp);
- bool impCanPInvokeInlineCallSite(var_types callRetTyp);
- void impCheckForPInvokeCall(GenTreePtr call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags);
+ bool impCanPInvokeInline(BasicBlock* block);
+ bool impCanPInvokeInlineCallSite(BasicBlock* block);
+ void impCheckForPInvokeCall(
+ GenTreePtr call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags, BasicBlock* block);
GenTreePtr impImportIndirectCall(CORINFO_SIG_INFO* sig, IL_OFFSETX ilOffset = BAD_IL_OFFSET);
void impPopArgsForUnmanagedCall(GenTreePtr call, CORINFO_SIG_INFO* sig);
@@ -2739,8 +2808,6 @@ protected:
GenTreePtr impFixupCallStructReturn(GenTreePtr call, CORINFO_CLASS_HANDLE retClsHnd);
- GenTreePtr impInitCallLongReturn(GenTreePtr call);
-
GenTreePtr impFixupStructReturnType(GenTreePtr op, CORINFO_CLASS_HANDLE retClsHnd);
#ifdef DEBUG
@@ -2764,7 +2831,6 @@ protected:
void impImportLeave(BasicBlock* block);
void impResetLeaveBlock(BasicBlock* block, unsigned jmpAddr);
- BOOL impLocAllocOnStack();
GenTreePtr impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
@@ -2868,6 +2934,8 @@ public:
unsigned flags,
void* compileTimeHandle);
+ GenTreePtr getRuntimeContextTree(CORINFO_RUNTIME_LOOKUP_KIND kind);
+
GenTreePtr impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken,
CORINFO_LOOKUP* pLookup,
void* compileTimeHandle);
@@ -3148,8 +3216,6 @@ private:
static LONG jitNestingLevel;
#endif // DEBUG
- bool seenConditionalJump;
-
static BOOL impIsAddressInLocal(GenTreePtr tree, GenTreePtr* lclVarTreeOut);
void impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, InlineResult* inlineResult);
@@ -3455,8 +3521,9 @@ public:
void fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw);
void fgMorphBlocks();
- bool fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(const char* msg));
+ bool fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg));
+ void fgCheckArgCnt();
void fgSetOptions();
#ifdef DEBUG
@@ -3845,7 +3912,7 @@ public:
//
var_types getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
structPassingKind* wbPassStruct = nullptr,
- unsigned structSize = 0);
+ unsigned structSize = 0);
#ifdef DEBUG
// Print a representation of "vnp" or "vn" on standard output.
@@ -4072,7 +4139,7 @@ public:
void fgUnreachableBlock(BasicBlock* block);
- void fgRemoveJTrue(BasicBlock* block);
+ void fgRemoveConditionalJump(BasicBlock* block);
BasicBlock* fgLastBBInMainFunction();
@@ -4204,6 +4271,7 @@ public:
void fgDebugCheckLinks(bool morphTrees = false);
void fgDebugCheckNodeLinks(BasicBlock* block, GenTreePtr stmt);
void fgDebugCheckFlags(GenTreePtr tree);
+ void fgDebugCheckFlagsHelper(GenTreePtr tree, unsigned treeFlags, unsigned chkFlags);
#endif
#ifdef LEGACY_BACKEND
@@ -4305,7 +4373,7 @@ protected:
void fgLinkBasicBlocks();
- void fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget);
+ unsigned fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget);
void fgCheckBasicBlockControlFlow();
@@ -4380,13 +4448,6 @@ private:
GenTree* fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType = nullptr);
GenTree* fgMakeMultiUse(GenTree** ppTree);
- // After replacing oldChild with newChild, fixup the fgArgTabEntryPtr
- // if it happens to be an argument to a call.
- void fgFixupIfCallArg(ArrayStack<GenTree*>* parentStack, GenTree* oldChild, GenTree* newChild);
-
-public:
- void fgFixupArgTabEntryPtr(GenTreePtr parentCall, GenTreePtr oldArg, GenTreePtr newArg);
-
private:
// Recognize a bitwise rotation pattern and convert into a GT_ROL or a GT_ROR node.
GenTreePtr fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree);
@@ -4440,16 +4501,11 @@ private:
// for sufficiently small offsets, we can rely on OS page protection to implicitly null-check addresses that we
// know will be dereferenced. To know that reliance on implicit null checking is sound, we must further know that
// all offsets between the top-level indirection and the bottom are constant, and that their sum is sufficiently
- // small; hence the other fields of MorphAddrContext. Finally, the odd structure of GT_COPYBLK, in which the second
- // argument is a GT_LIST, requires us to "tell" that List node that its parent is a GT_COPYBLK, so it "knows" that
- // each of its arguments should be evaluated in MACK_Ind contexts. (This would not be true for GT_LIST nodes
- // representing method call argument lists.)
+ // small; hence the other fields of MorphAddrContext.
enum MorphAddrContextKind
{
MACK_Ind,
MACK_Addr,
- MACK_CopyBlock, // This is necessary so we know we have to start a new "Ind" context for each of the
- // addresses in the arg list.
};
struct MorphAddrContext
{
@@ -4513,7 +4569,7 @@ private:
void fgMorphCallInline(GenTreeCall* call, InlineResult* result);
void fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result);
#if DEBUG
- void fgNoteNonInlineCandidate(GenTreePtr tree, GenTreeCall* call);
+ void fgNoteNonInlineCandidate(GenTreeStmt* stmt, GenTreeCall* call);
static fgWalkPreFn fgFindNonInlineCandidate;
#endif
GenTreePtr fgOptimizeDelegateConstructor(GenTreePtr call, CORINFO_CONTEXT_HANDLE* ExactContextHnd);
@@ -4525,16 +4581,14 @@ private:
GenTreePtr fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue = false);
GenTreePtr fgMorphBlkNode(GenTreePtr tree, bool isDest);
GenTreePtr fgMorphBlockOperand(GenTreePtr tree, var_types asgType, unsigned blockWidth, bool isDest);
+ void fgMorphUnsafeBlk(GenTreeObj* obj);
GenTreePtr fgMorphCopyBlock(GenTreePtr tree);
GenTreePtr fgMorphForRegisterFP(GenTreePtr tree);
GenTreePtr fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac = nullptr);
GenTreePtr fgMorphSmpOpPre(GenTreePtr tree);
- GenTreePtr fgMorphDivByConst(GenTreeOp* tree);
- GenTreePtr fgMorphModByConst(GenTreeOp* tree);
GenTreePtr fgMorphModToSubMulDiv(GenTreeOp* tree);
GenTreePtr fgMorphSmpOpOptional(GenTreeOp* tree);
GenTreePtr fgMorphRecognizeBoxNullable(GenTree* compare);
- bool fgShouldUseMagicNumberDivide(GenTreeOp* tree);
GenTreePtr fgMorphToEmulatedFP(GenTreePtr tree);
GenTreePtr fgMorphConst(GenTreePtr tree);
@@ -4544,11 +4598,12 @@ public:
private:
#if LOCAL_ASSERTION_PROP
+ void fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTreePtr tree));
void fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree));
#endif
void fgMorphTreeDone(GenTreePtr tree, GenTreePtr oldTree = nullptr DEBUGARG(int morphNum = 0));
- GenTreePtr fgMorphStmt;
+ GenTreeStmt* fgMorphStmt;
unsigned fgGetBigOffsetMorphingTemp(var_types type); // We cache one temp per type to be
// used when morphing big offset.
@@ -4564,7 +4619,6 @@ private:
void fgMarkUseDef(GenTreeLclVarCommon* tree, GenTree* asgdLclVar = nullptr);
-#ifdef DEBUGGING_SUPPORT
void fgBeginScopeLife(VARSET_TP* inScope, VarScopeDsc* var);
void fgEndScopeLife(VARSET_TP* inScope, VarScopeDsc* var);
@@ -4578,8 +4632,6 @@ private:
void fgDispDebugScopes();
#endif // DEBUG
-#endif // DEBUGGING_SUPPORT
-
//-------------------------------------------------------------------------
//
// The following keeps track of any code we've added for things like array
@@ -4622,6 +4674,7 @@ private:
void fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* result);
void fgInsertInlineeBlocks(InlineInfo* pInlineInfo);
GenTreePtr fgInlinePrependStatements(InlineInfo* inlineInfo);
+ void fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* block, GenTreePtr stmt);
#if FEATURE_MULTIREG_RET
GenTreePtr fgGetStructAsStructPtr(GenTreePtr tree);
@@ -4905,6 +4958,7 @@ public:
#define LPFLG_VAR_LIMIT 0x0100 // iterator is compared with a local var (var # found in lpVarLimit)
#define LPFLG_CONST_LIMIT 0x0200 // iterator is compared with a constant (found in lpConstLimit)
#define LPFLG_ARRLEN_LIMIT 0x0400 // iterator is compared with a.len or a[i].len (found in lpArrLenLimit)
+#define LPFLG_SIMD_LIMIT 0x0080 // iterator is compared with Vector<T>.Count (found in lpConstLimit)
#define LPFLG_HAS_PREHEAD 0x0800 // lpHead is known to be a preHead for this loop
#define LPFLG_REMOVED 0x1000 // has been removed from the loop table (unrolled or optimized away)
@@ -5205,6 +5259,11 @@ protected:
static const int MIN_CSE_COST = 2;
+ // Keeps tracked cse indices
+ BitVecTraits* cseTraits;
+ EXPSET_TP cseFull;
+ EXPSET_TP cseEmpty;
+
/* Generic list of nodes - used by the CSE logic */
struct treeLst
@@ -6237,7 +6296,7 @@ public:
BOOL eeIsValueClass(CORINFO_CLASS_HANDLE clsHnd);
-#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD)
+#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD) || defined(TRACK_LSRA_STATS)
bool IsSuperPMIException(unsigned code)
{
@@ -6334,10 +6393,19 @@ public:
#endif
}
+ inline bool IsTargetAbi(CORINFO_RUNTIME_ABI abi)
+ {
+#if COR_JIT_EE_VERSION > 460
+ return eeGetEEInfo()->targetAbi == abi;
+#else
+ return CORINFO_DESKTOP_ABI == abi;
+#endif
+ }
+
inline bool generateCFIUnwindCodes()
{
-#if COR_JIT_EE_VERSION > 460 && defined(UNIX_AMD64_ABI)
- return eeGetEEInfo()->targetAbi == CORINFO_CORERT_ABI;
+#ifdef UNIX_AMD64_ABI
+ return IsTargetAbi(CORINFO_CORERT_ABI);
#else
return false;
#endif
@@ -6522,8 +6590,6 @@ private:
public:
CodeGenInterface* codeGen;
-#ifdef DEBUGGING_SUPPORT
-
// The following holds information about instr offsets in terms of generated code.
struct IPmappingDsc
@@ -6553,7 +6619,6 @@ public:
typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, IL_OFFSETX, JitSimplerHashBehavior>
CallSiteILOffsetTable;
CallSiteILOffsetTable* genCallSite2ILOffsetMap;
-#endif // DEBUGGING_SUPPORT
unsigned genReturnLocal; // Local number for the return value when applicable.
BasicBlock* genReturnBB; // jumped to when not optimizing for speed.
@@ -6588,8 +6653,14 @@ public:
{
return codeGen->doDoubleAlign();
}
- DWORD getCanDoubleAlign(); // Defined & used only by RegAlloc
-#endif // DOUBLE_ALIGN
+ DWORD getCanDoubleAlign();
+ bool shouldDoubleAlign(unsigned refCntStk,
+ unsigned refCntReg,
+ unsigned refCntWtdReg,
+ unsigned refCntStkParam,
+ unsigned refCntWtdStkDbl);
+#endif // DOUBLE_ALIGN
+
__declspec(property(get = getFullPtrRegMap, put = setFullPtrRegMap)) bool genFullPtrRegMap;
bool getFullPtrRegMap()
{
@@ -6829,6 +6900,11 @@ private:
return InstructionSet_AVX;
}
+ if (CanUseSSE3_4())
+ {
+ return InstructionSet_SSE3_4;
+ }
+
// min bar is SSE2
assert(canUseSSE2());
return InstructionSet_SSE2;
@@ -7072,7 +7148,7 @@ private:
// and small int base type vectors.
SIMDIntrinsicID impSIMDIntegralRelOpGreaterThanOrEqual(
CORINFO_CLASS_HANDLE typeHnd, unsigned simdVectorSize, var_types baseType, GenTree** op1, GenTree** op2);
-#endif // defined(_TARGET_AMD64_) && !defined(LEGACY_BACKEND)
+#endif // defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
void setLclRelatedToSIMDIntrinsic(GenTreePtr tree);
bool areFieldsContiguous(GenTreePtr op1, GenTreePtr op2);
@@ -7261,6 +7337,16 @@ private:
// Returns true if the TYP_SIMD locals on stack are aligned at their
// preferred byte boundary specified by getSIMDTypeAlignment().
+ //
+ // As per the Intel manual, the preferred alignment for AVX vectors is 32-bytes. On Amd64,
+ // RSP/EBP is aligned at 16-bytes, therefore to align SIMD types at 32-bytes we need even
+ // RSP/EBP to be 32-byte aligned. It is not clear whether additional stack space used in
+ // aligning stack is worth the benefit and for now will use 16-byte alignment for AVX
+ // 256-bit vectors with unaligned load/stores to/from memory. On x86, the stack frame
+ // is aligned to 4 bytes. We need to extend existing support for double (8-byte) alignment
+ // to 16 or 32 byte alignment for frames with local SIMD vars, if that is determined to be
+ // profitable.
+ //
bool isSIMDTypeLocalAligned(unsigned varNum)
{
#if defined(FEATURE_SIMD) && ALIGN_SIMD_TYPES
@@ -7270,8 +7356,7 @@ private:
int off = lvaFrameAddress(varNum, &ebpBased);
// TODO-Cleanup: Can't this use the lvExactSize on the varDsc?
int alignment = getSIMDTypeAlignment(lvaTable[varNum].lvType);
- bool isAligned = ((off % alignment) == 0);
- noway_assert(isAligned || lvaTable[varNum].lvIsParam);
+ bool isAligned = (alignment <= STACK_ALIGN) && ((off % alignment) == 0);
return isAligned;
}
#endif // FEATURE_SIMD
@@ -7289,6 +7374,16 @@ private:
#endif
}
+ // Whether SSE3, SSE3, SSE4.1 and SSE4.2 is available
+ bool CanUseSSE3_4() const
+ {
+#ifdef _TARGET_XARCH_
+ return opts.compCanUseSSE3_4;
+#else
+ return false;
+#endif
+ }
+
bool canUseAVX() const
{
#ifdef FEATURE_AVX_SUPPORT
@@ -7393,21 +7488,21 @@ public:
struct Options
{
- CORJIT_FLAGS* jitFlags; // all flags passed from the EE
- unsigned eeFlags; // CorJitFlag flags passed from the EE
- unsigned compFlags; // method attributes
+ JitFlags* jitFlags; // all flags passed from the EE
+ unsigned compFlags; // method attributes
codeOptimize compCodeOpt; // what type of code optimizations
bool compUseFCOMI;
bool compUseCMOV;
#ifdef _TARGET_XARCH_
- bool compCanUseSSE2; // Allow CodeGen to use "movq XMM" instructions
+ bool compCanUseSSE2; // Allow CodeGen to use "movq XMM" instructions
+ bool compCanUseSSE3_4; // Allow CodeGen to use SSE3, SSSE3, SSE4.1 and SSE4.2 instructions
#ifdef FEATURE_AVX_SUPPORT
bool compCanUseAVX; // Allow CodeGen to use AVX 256-bit vectors for SIMD operations
-#endif
-#endif
+#endif // FEATURE_AVX_SUPPORT
+#endif // _TARGET_XARCH_
// optimize maximally and/or favor speed over size?
@@ -7464,7 +7559,7 @@ public:
#ifdef FEATURE_READYTORUN_COMPILER
inline bool IsReadyToRun()
{
- return (eeFlags & CORJIT_FLG_READYTORUN) != 0;
+ return jitFlags->IsSet(JitFlags::JIT_FLAG_READYTORUN);
}
#else
inline bool IsReadyToRun()
@@ -7478,7 +7573,7 @@ public:
inline bool ShouldUsePInvokeHelpers()
{
#if COR_JIT_EE_VERSION > 460
- return (jitFlags->corJitFlags2 & CORJIT_FLG2_USE_PINVOKE_HELPERS) != 0;
+ return jitFlags->IsSet(JitFlags::JIT_FLAG_USE_PINVOKE_HELPERS);
#else
return false;
#endif
@@ -7489,7 +7584,7 @@ public:
inline bool IsReversePInvoke()
{
#if COR_JIT_EE_VERSION > 460
- return (jitFlags->corJitFlags2 & CORJIT_FLG2_REVERSE_PINVOKE) != 0;
+ return jitFlags->IsSet(JitFlags::JIT_FLAG_REVERSE_PINVOKE);
#else
return false;
#endif
@@ -7499,7 +7594,7 @@ public:
inline bool IsJit32Compat()
{
#if defined(_TARGET_X86_) && COR_JIT_EE_VERSION > 460
- return (jitFlags->corJitFlags2 & CORJIT_FLG2_DESKTOP_QUIRKS) != 0;
+ return jitFlags->IsSet(JitFlags::JIT_FLAG_DESKTOP_QUIRKS);
#else
return false;
#endif
@@ -7509,7 +7604,7 @@ public:
inline bool IsJit64Compat()
{
#if defined(_TARGET_AMD64_) && COR_JIT_EE_VERSION > 460
- return (jitFlags->corJitFlags2 & CORJIT_FLG2_DESKTOP_QUIRKS) != 0;
+ return jitFlags->IsSet(JitFlags::JIT_FLAG_DESKTOP_QUIRKS);
#elif defined(_TARGET_AMD64_) && !defined(FEATURE_CORECLR)
return true;
#else
@@ -7517,14 +7612,10 @@ public:
#endif
}
-#ifdef DEBUGGING_SUPPORT
bool compScopeInfo; // Generate the LocalVar info ?
bool compDbgCode; // Generate debugger-friendly code?
bool compDbgInfo; // Gather debugging info?
bool compDbgEnC;
-#else
- static const bool compDbgCode;
-#endif
#ifdef PROFILING_SUPPORTED
bool compNoPInvokeInlineCB;
@@ -7584,6 +7675,7 @@ public:
bool altJit; // True if we are an altjit and are compiling this method
#ifdef DEBUG
+ bool optRepeat; // Repeat optimizer phases k times
bool compProcedureSplittingEH; // Separate cold code from hot code for functions with EH
bool dspCode; // Display native code generated
bool dspEHTable; // Display the EH table reported to the VM
@@ -7623,9 +7715,11 @@ public:
// for any call. We have a plan for not needing for stubs though
bool compNeedStackProbes;
- // Whether to emit Enter/Leave/TailCall hooks using a dummy stub (DummyProfilerELTStub())
- // This options helps one to make JIT behave as if it is under profiler.
+#ifdef PROFILING_SUPPORTED
+ // Whether to emit Enter/Leave/TailCall hooks using a dummy stub (DummyProfilerELTStub()).
+ // This option helps make the JIT behave as if it is running under a profiler.
bool compJitELTHookEnabled;
+#endif // PROFILING_SUPPORTED
#if FEATURE_TAILCALL_OPT
// Whether opportunistic or implicit tail call optimization is enabled.
@@ -7650,8 +7744,6 @@ public:
#ifdef DEBUG
- static bool s_dspMemStats; // Display per-phase memory statistics for every function
-
template <typename T>
T dspPtr(T p)
{
@@ -7759,8 +7851,8 @@ public:
codeOptimize compCodeOpt()
{
#if 0
- // Switching between size & speed has measurable throughput impact
- // (3.5% on NGen mscorlib when measured). It used to be enabled for
+ // Switching between size & speed has measurable throughput impact
+ // (3.5% on NGen mscorlib when measured). It used to be enabled for
// DEBUG, but should generate identical code between CHK & RET builds,
// so that's not acceptable.
// TODO-Throughput: Figure out what to do about size vs. speed & throughput.
@@ -7772,10 +7864,6 @@ public:
#endif
}
-#ifdef DEBUG
- CLRRandom* inlRNG;
-#endif
-
//--------------------- Info about the procedure --------------------------
struct Info
@@ -7855,8 +7943,6 @@ public:
// and the VM expects that, or the JIT is a "self-host" compiler
// (e.g., x86 hosted targeting x86) and the VM expects that.
-#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
-
/* The following holds IL scope information about local variables.
*/
@@ -7871,8 +7957,6 @@ public:
unsigned compStmtOffsetsCount;
ICorDebugInfo::BoundaryTypes compStmtOffsetsImplicit;
-#endif // DEBUGGING_SUPPORT || DEBUG
-
#define CPU_X86 0x0100 // The generic X86 CPU
#define CPU_X86_PENTIUM_4 0x0110
@@ -7937,9 +8021,12 @@ public:
// Such method's compRetNativeType is TYP_STRUCT without a hidden RetBufArg
return varTypeIsStruct(info.compRetNativeType) && (info.compRetBuffArg == BAD_VAR_NUM);
#endif // TARGET_XXX
+
#else // not FEATURE_MULTIREG_RET
+
// For this architecture there are no multireg returns
return false;
+
#endif // FEATURE_MULTIREG_RET
}
@@ -7960,7 +8047,7 @@ public:
void compDispLocalVars();
-#endif // DEBUGGING_SUPPORT || DEBUG
+#endif // DEBUG
//-------------------------- Global Compiler Data ------------------------------------
@@ -8059,19 +8146,22 @@ public:
CORINFO_METHOD_INFO* methodInfo,
void** methodCodePtr,
ULONG* methodCodeSize,
- CORJIT_FLAGS* compileFlags);
+ JitFlags* compileFlags);
void compCompileFinish();
int compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
COMP_HANDLE compHnd,
CORINFO_METHOD_INFO* methodInfo,
void** methodCodePtr,
ULONG* methodCodeSize,
- CORJIT_FLAGS* compileFlags,
+ JitFlags* compileFlags,
CorInfoInstantiationVerification instVerInfo);
ArenaAllocator* compGetAllocator();
#if MEASURE_MEM_ALLOC
+
+ static bool s_dspMemStats; // Display per-phase memory statistics for every function
+
struct MemStats
{
unsigned allocCnt; // # of allocs
@@ -8195,9 +8285,8 @@ public:
void compDspSrcLinesByLineNum(unsigned line, bool seek = false);
#endif // DEBUG
-//-------------------------------------------------------------------------
+ //-------------------------------------------------------------------------
-#ifdef DEBUGGING_SUPPORT
typedef ListNode<VarScopeDsc*> VarScopeListNode;
struct VarScopeMapInfo
@@ -8255,8 +8344,6 @@ public:
void compDispScopeLists();
#endif // DEBUG
-#endif // DEBUGGING_SUPPORT
-
bool compIsProfilerHookNeeded();
//-------------------------------------------------------------------------
@@ -8299,7 +8386,7 @@ public:
protected:
size_t compMaxUncheckedOffsetForNullObject;
- void compInitOptions(CORJIT_FLAGS* compileFlags);
+ void compInitOptions(JitFlags* compileFlags);
void compSetProcessor();
void compInitDebuggingInfo();
@@ -8307,16 +8394,22 @@ protected:
#ifdef _TARGET_ARMARCH_
bool compRsvdRegCheck(FrameLayoutState curState);
#endif
- void compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_FLAGS* compileFlags);
+ void compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags* compileFlags);
- // Data required for generating profiler Enter/Leave/TailCall hooks
- CLANG_FORMAT_COMMENT_ANCHOR;
+ // Clear annotations produced during optimizations; to be used between iterations when repeating opts.
+ void ResetOptAnnotations();
+
+ // Regenerate loop descriptors; to be used between iterations when repeating opts.
+ void RecomputeLoopInfo();
#ifdef PROFILING_SUPPORTED
+ // Data required for generating profiler Enter/Leave/TailCall hooks
+
bool compProfilerHookNeeded; // Whether profiler Enter/Leave/TailCall hook needs to be generated for the method
void* compProfilerMethHnd; // Profiler handle of the method being compiled. Passed as param to ELT callbacks
bool compProfilerMethHndIndirected; // Whether compProfilerHandle is pointer to the handle or is an actual handle
#endif
+
#ifdef _TARGET_AMD64_
bool compQuirkForPPP(); // Check if this method should be Quirked for the PPP issue
#endif
@@ -8692,6 +8785,18 @@ private:
#endif
inline void EndPhase(Phases phase); // Indicate the end of the given phase.
+#if MEASURE_CLRAPI_CALLS
+ // Thin wrappers that call into JitTimer (if present).
+ inline void CLRApiCallEnter(unsigned apix);
+ inline void CLRApiCallLeave(unsigned apix);
+
+public:
+ inline void CLR_API_Enter(API_ICorJitInfo_Names ename);
+ inline void CLR_API_Leave(API_ICorJitInfo_Names ename);
+
+private:
+#endif
+
#if defined(DEBUG) || defined(INLINE_DATA) || defined(FEATURE_CLRSQM)
// These variables are associated with maintaining SQM data about compile time.
unsigned __int64 m_compCyclesAtEndOfInlining; // The thread-virtualized cycle count at the end of the inlining phase