From 00f5934a3e34977c7a1502da604f2dae90040888 Mon Sep 17 00:00:00 2001 From: Eugene Rozenfeld Date: Mon, 29 Oct 2018 17:34:17 -0700 Subject: Implement escape analysis and stack allocation of non-box objects without gc fields. This change implements a conservative flow-insensitive escape analysis and stack allocation of non-box objects without gc fields. Handling of objects with gc fields, box objects, and fixed-size arrays is future work. Escape analysis is based on the one described here: https://www.cc.gatech.edu/~harrold/6340/cs6340_fall2009/Readings/choi99escape.pdf Main limitations of this version of the escape analysis: 1. The analysis is flow-insensitive. 2. The analysis is intra-procedural and only sees the current method and the inlined methods. 3. The analysis assumes that references passed to non-pure-helper calls escape. 4. The analysis assumes that any references assigned to fields of objects escape. Some of these limitations will be removed in future work. I started with prior prototypes from @echesakovMSFT and @AndyAyersMS and extended and refactored parts of them. I also added tests for cases that are currently handled or will be handled soon. --- src/jit/compiler.h | 13 +- src/jit/compmemkind.h | 1 + src/jit/gcencode.cpp | 29 ++ src/jit/gentree.cpp | 9 +- src/jit/jitgcinfo.h | 3 + src/jit/lclvars.cpp | 2 +- src/jit/morph.cpp | 4 + src/jit/objectalloc.cpp | 529 +++++++++++++++++++-- src/jit/objectalloc.h | 105 +++- tests/issues.targets | 7 + .../ObjectStackAllocationTests.cs | 232 +++++++++ .../ObjectStackAllocationTests.csproj | 54 +++ 12 files changed, 936 insertions(+), 52 deletions(-) create mode 100644 tests/src/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests.cs create mode 100644 tests/src/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests.csproj diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 9be0fdd2d8..d7ba9709c5 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -6115,12 +6115,13 @@ public: } }; -#define OMF_HAS_NEWARRAY 0x00000001 // Method contains 'new' of an array -#define OMF_HAS_NEWOBJ 0x00000002 // Method contains 'new' of an object type. -#define OMF_HAS_ARRAYREF 0x00000004 // Method contains array element loads or stores. -#define OMF_HAS_VTABLEREF 0x00000008 // Method contains method table reference. -#define OMF_HAS_NULLCHECK 0x00000010 // Method contains null check. -#define OMF_HAS_FATPOINTER 0x00000020 // Method contains call, that needs fat pointer transformation. +#define OMF_HAS_NEWARRAY 0x00000001 // Method contains 'new' of an array +#define OMF_HAS_NEWOBJ 0x00000002 // Method contains 'new' of an object type. +#define OMF_HAS_ARRAYREF 0x00000004 // Method contains array element loads or stores. +#define OMF_HAS_VTABLEREF 0x00000008 // Method contains method table reference. +#define OMF_HAS_NULLCHECK 0x00000010 // Method contains null check. +#define OMF_HAS_FATPOINTER 0x00000020 // Method contains call, that needs fat pointer transformation. +#define OMF_HAS_OBJSTACKALLOC 0x00000040 // Method contains an object allocated on the stack. bool doesMethodHaveFatPointer() { diff --git a/src/jit/compmemkind.h b/src/jit/compmemkind.h index ff00accc2b..582a2a9e3a 100644 --- a/src/jit/compmemkind.h +++ b/src/jit/compmemkind.h @@ -53,6 +53,7 @@ CompMemKindMacro(Unknown) CompMemKindMacro(RangeCheck) CompMemKindMacro(CopyProp) CompMemKindMacro(SideEffects) +CompMemKindMacro(ObjectAllocator) //clang-format on #undef CompMemKindMacro diff --git a/src/jit/gcencode.cpp b/src/jit/gcencode.cpp index d742822511..af37304d48 100644 --- a/src/jit/gcencode.cpp +++ b/src/jit/gcencode.cpp @@ -4245,6 +4245,7 @@ void GCInfo::gcMakeRegPtrTable( // Or in byref_OFFSET_FLAG for 'byref' pointer tracking flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR); } + gcUpdateFlagForStackAllocatedObjects(flags); if (varDsc->lvPinned) { @@ -4344,6 +4345,7 @@ void GCInfo::gcMakeRegPtrTable( { flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR); } + gcUpdateFlagForStackAllocatedObjects(flags); GcStackSlotBase stackSlotBase = GC_SP_REL; if (compiler->isFramePointerUsed()) @@ -4728,6 +4730,7 @@ void GCInfo::gcInfoRecordGCRegStateChange(GcInfoEncoder* gcInfoEncoder, { regFlags = (GcSlotFlags)(regFlags | GC_SLOT_INTERIOR); } + gcUpdateFlagForStackAllocatedObjects(regFlags); RegSlotIdKey rskey(regNum, regFlags); GcSlotId regSlotId; @@ -4818,6 +4821,8 @@ void GCInfo::gcMakeVarPtrTable(GcInfoEncoder* gcInfoEncoder, MakeRegPtrMode mode { flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR); } + gcUpdateFlagForStackAllocatedObjects(flags); + if ((lowBits & pinned_OFFSET_FLAG) != 0) { flags = (GcSlotFlags)(flags | GC_SLOT_PINNED); @@ -4920,6 +4925,30 @@ void GCInfo::gcInfoRecordGCStackArgsDead(GcInfoEncoder* gcInfoEncoder, } } +//------------------------------------------------------------------------ +// gcUpdateFlagForStackAllocatedObjects: Update the flags to handle a possibly stack-allocated object. +// allocation. +// Arguments: +// flags - flags to update +// +// +// Notes: +// TODO-ObjectStackAllocation: This is a temporary conservative implementation. +// Currently variables pointing to heap and/or stack allocated objects have type TYP_REF so we +// conservatively report them as INTERIOR. +// Ideally we should have the following types for object pointers: +// 1. TYP_I_IMPL for variables always pointing to stack-allocated objects (not reporting to GC) +// 2. TYP_REF for variables always pointing to heap-allocated objects (reporting as normal objects to GC) +// 3. TYP_BYREF for variables that may point to the stack or to the heap (reporting as interior objects to GC) + +void GCInfo::gcUpdateFlagForStackAllocatedObjects(GcSlotFlags& flags) +{ + if ((compiler->optMethodFlags & OMF_HAS_OBJSTACKALLOC) != 0) + { + flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR); + } +} + #undef GCENCODER_WITH_LOGGING #endif // !JIT32_GCENCODER diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 70a5bf6d4f..99109db1e1 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -10773,7 +10773,14 @@ void Compiler::gtDispTree(GenTree* tree, switch (tree->gtOper) { case GT_FIELD: - printf(" %s", eeGetFieldName(tree->gtField.gtFldHnd), 0); + if (FieldSeqStore::IsPseudoField(tree->gtField.gtFldHnd)) + { + printf(" #PseudoField:0x%x", tree->gtField.gtFldOffset); + } + else + { + printf(" %s", eeGetFieldName(tree->gtField.gtFldHnd), 0); + } if (tree->gtField.gtFldObj && !topOnly) { diff --git a/src/jit/jitgcinfo.h b/src/jit/jitgcinfo.h index 193e77c172..1d643e9c82 100644 --- a/src/jit/jitgcinfo.h +++ b/src/jit/jitgcinfo.h @@ -231,6 +231,9 @@ public: regPtrDsc* genStackPtrFirst, regPtrDsc* genStackPtrLast); + // Update the flags for a stack allocated object + void gcUpdateFlagForStackAllocatedObjects(GcSlotFlags& flags); + #endif #if MEASURE_PTRTAB_SIZE diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index 3f81d40553..a8ffe7c859 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -1590,7 +1590,7 @@ bool Compiler::StructPromotionHelper::CanPromoteStructType(CORINFO_CLASS_HANDLE { if (!compiler->eeIsValueClass(typeHnd)) { - // TODO: Enable promotion of fields of stack-allocated objects. + // TODO-ObjectStackAllocation: Enable promotion of fields of stack-allocated objects. return false; } diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index bb04cdb302..6a98df9ea4 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -16738,10 +16738,14 @@ void Compiler::fgMorph() // local variable allocation on the stack. ObjectAllocator objectAllocator(this); // PHASE_ALLOCATE_OBJECTS +// TODO-ObjectStackAllocation: Enable the optimization for architectures using +// JIT32_GCENCODER (i.e., x86). +#ifndef JIT32_GCENCODER if (JitConfig.JitObjectStackAllocation() && !opts.MinOpts() && !opts.compDbgCode) { objectAllocator.EnableObjectStackAllocation(); } +#endif // JIT32_GCENCODER objectAllocator.Run(); diff --git a/src/jit/objectalloc.cpp b/src/jit/objectalloc.cpp index c4fdc11c53..00732ec750 100644 --- a/src/jit/objectalloc.cpp +++ b/src/jit/objectalloc.cpp @@ -16,57 +16,256 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #pragma hdrstop #endif -//=============================================================================== +#include "gentree.h" //------------------------------------------------------------------------ // DoPhase: Run analysis (if object stack allocation is enabled) and then // morph each GT_ALLOCOBJ node either into an allocation helper // call or stack allocation. +// // Notes: // Runs only if Compiler::optMethodFlags has flag OMF_HAS_NEWOBJ set. + void ObjectAllocator::DoPhase() { + JITDUMP("\n*** ObjectAllocationPhase: "); if ((comp->optMethodFlags & OMF_HAS_NEWOBJ) == 0) { + JITDUMP("no newobjs in this method; punting\n"); return; } if (IsObjectStackAllocationEnabled()) { + JITDUMP("enabled, analyzing...\n"); DoAnalysis(); } + else + { + JITDUMP("disabled, punting\n"); + } + + const bool didStackAllocate = MorphAllocObjNodes(); + + if (didStackAllocate) + { + RewriteUses(); + } +} + +//------------------------------------------------------------------------------ +// MarkLclVarAsEscaping : Mark local variable as escaping. +// +// +// Arguments: +// lclNum - Escaping pointing local variable number + +void ObjectAllocator::MarkLclVarAsEscaping(unsigned int lclNum) +{ + BitVecOps::AddElemD(&m_bitVecTraits, m_EscapingPointers, lclNum); +} + +//------------------------------------------------------------------------------ +// IsLclVarEscaping : Check if the local variable has been marked as escaping. +// +// +// Arguments: +// lclNum - Local variable number +// +// Return Value: +// True if the local variable has been marked as escaping; false otherwise + +bool ObjectAllocator::IsLclVarEscaping(unsigned int lclNum) +{ + return BitVecOps::IsMember(&m_bitVecTraits, m_EscapingPointers, lclNum); +} - MorphAllocObjNodes(); +//------------------------------------------------------------------------------ +// AddConnGraphEdge : Record that the source local variable may point to the same set of objects +// as the set pointed to by target local variable. +// +// Arguments: +// sourceLclNum - Local variable number of the edge source +// targetLclNum - Local variable number of the edge target + +void ObjectAllocator::AddConnGraphEdge(unsigned int sourceLclNum, unsigned int targetLclNum) +{ + BitVecOps::AddElemD(&m_bitVecTraits, m_ConnGraphAdjacencyMatrix[sourceLclNum], targetLclNum); } //------------------------------------------------------------------------ // DoAnalysis: Walk over basic blocks of the method and detect all local // variables that can be allocated on the stack. -// -// Assumptions: -// Must be run after the dominators have been computed (we need this -// information to detect loops). + void ObjectAllocator::DoAnalysis() { assert(m_IsObjectStackAllocationEnabled); - assert(comp->fgDomsComputed); - // TODO-ObjectStackAllocation - NYI("DoAnalysis"); + assert(!m_AnalysisDone); + + if (comp->lvaCount > 0) + { + m_EscapingPointers = BitVecOps::MakeEmpty(&m_bitVecTraits); + m_ConnGraphAdjacencyMatrix = new (comp->getAllocator(CMK_ObjectAllocator)) BitSetShortLongRep[comp->lvaCount]; + + MarkEscapingVarsAndBuildConnGraph(); + ComputeEscapingNodes(&m_bitVecTraits, m_EscapingPointers); + } + + m_AnalysisDone = true; +} + +//------------------------------------------------------------------------------ +// MarkEscapingVarsAndBuildConnGraph : Walk the trees of the method and mark any ref/byref/i_impl +// local variables that may escape. Build a connection graph +// for ref/by_ref/i_impl local variables. +// +// Arguments: +// sourceLclNum - Local variable number of the edge source +// targetLclNum - Local variable number of the edge target +// +// Notes: +// The connection graph has an edge from local variable s to local variable t if s may point +// to the objects t points to at some point in the method. It's a simplified version +// of the graph described in this paper: +// https://www.cc.gatech.edu/~harrold/6340/cs6340_fall2009/Readings/choi99escape.pdf +// We currently don't have field edges and the edges we do have are called "deferred" in the paper. + +void ObjectAllocator::MarkEscapingVarsAndBuildConnGraph() +{ + class BuildConnGraphVisitor final : public GenTreeVisitor + { + ObjectAllocator* m_allocator; + + public: + enum + { + DoPreOrder = true, + DoLclVarsOnly = true, + ComputeStack = true, + }; + + BuildConnGraphVisitor(ObjectAllocator* allocator) + : GenTreeVisitor(allocator->comp), m_allocator(allocator) + { + } + + Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + { + GenTree* tree = *use; + assert(tree != nullptr); + assert(tree->IsLocal()); + + var_types type = tree->TypeGet(); + if ((tree->OperGet() == GT_LCL_VAR) && (type == TYP_REF || type == TYP_BYREF || type == TYP_I_IMPL)) + { + unsigned int lclNum = tree->AsLclVar()->GetLclNum(); + assert(tree == m_ancestors.Index(0)); + + if (m_allocator->CanLclVarEscapeViaParentStack(&m_ancestors, lclNum)) + { + if (!m_allocator->IsLclVarEscaping(lclNum)) + { + JITDUMP("V%02u first escapes via [%06u]\n", lclNum, m_compiler->dspTreeID(tree)); + } + m_allocator->MarkLclVarAsEscaping(lclNum); + } + } + return Compiler::fgWalkResult::WALK_CONTINUE; + } + }; + + for (unsigned int lclNum = 0; lclNum < comp->lvaCount; ++lclNum) + { + var_types type = comp->lvaTable[lclNum].TypeGet(); + + if (type == TYP_REF || type == TYP_I_IMPL || type == TYP_BYREF) + { + m_ConnGraphAdjacencyMatrix[lclNum] = BitVecOps::MakeEmpty(&m_bitVecTraits); + + if (comp->lvaTable[lclNum].lvAddrExposed) + { + JITDUMP(" V%02u is address exposed\n", lclNum); + MarkLclVarAsEscaping(lclNum); + } + } + else + { + // Variable that may not point to objects will not participate in our analysis. + m_ConnGraphAdjacencyMatrix[lclNum] = BitVecOps::UninitVal(); + } + } + + BasicBlock* block; + + foreach_block(comp, block) + { + for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) + { + BuildConnGraphVisitor buildConnGraphVisitor(this); + buildConnGraphVisitor.WalkTree(&stmt->gtStmtExpr, nullptr); + } + } +} + +//------------------------------------------------------------------------------ +// ComputeEscapingNodes : Given an initial set of escaping nodes, update it to contain the full set +// of escaping nodes by computing nodes reachable from the given set. +// +// Arguments: +// bitVecTraits - Bit vector traits +// escapingNodes [in/out] - Initial set of escaping nodes + +void ObjectAllocator::ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& escapingNodes) +{ + BitSetShortLongRep escapingNodesToProcess = BitVecOps::MakeCopy(bitVecTraits, escapingNodes); + BitSetShortLongRep newEscapingNodes = BitVecOps::UninitVal(); + + unsigned int lclNum; + + bool doOneMoreIteration = true; + while (doOneMoreIteration) + { + BitVecOps::Iter iterator(bitVecTraits, escapingNodesToProcess); + doOneMoreIteration = false; + + while (iterator.NextElem(&lclNum)) + { + doOneMoreIteration = true; + + // newEscapingNodes = adjacentNodes[lclNum] + BitVecOps::Assign(bitVecTraits, newEscapingNodes, m_ConnGraphAdjacencyMatrix[lclNum]); + // newEscapingNodes = newEscapingNodes \ escapingNodes + BitVecOps::DiffD(bitVecTraits, newEscapingNodes, escapingNodes); + // escapingNodesToProcess = escapingNodesToProcess U newEscapingNodes + BitVecOps::UnionD(bitVecTraits, escapingNodesToProcess, newEscapingNodes); + // escapingNodes = escapingNodes U newEscapingNodes + BitVecOps::UnionD(bitVecTraits, escapingNodes, newEscapingNodes); + // escapingNodesToProcess = escapingNodesToProcess \ { lclNum } + BitVecOps::RemoveElemD(bitVecTraits, escapingNodesToProcess, lclNum); + } + } } //------------------------------------------------------------------------ // MorphAllocObjNodes: Morph each GT_ALLOCOBJ node either into an // allocation helper call or stack allocation. // +// Returns: +// true if any allocation was done as a stack allocation. +// // Notes: // Runs only over the blocks having bbFlags BBF_HAS_NEWOBJ set. -void ObjectAllocator::MorphAllocObjNodes() + +bool ObjectAllocator::MorphAllocObjNodes() { + bool didStackAllocate = false; + BasicBlock* block; foreach_block(comp, block) { - const bool basicBlockHasNewObj = (block->bbFlags & BBF_HAS_NEWOBJ) == BBF_HAS_NEWOBJ; + const bool basicBlockHasNewObj = (block->bbFlags & BBF_HAS_NEWOBJ) == BBF_HAS_NEWOBJ; + const bool basicBlockHasBackwardJump = (block->bbFlags & BBF_BACKWARD_JUMP) == BBF_BACKWARD_JUMP; #ifndef DEBUG if (!basicBlockHasNewObj) { @@ -96,10 +295,11 @@ void ObjectAllocator::MorphAllocObjNodes() assert(basicBlockHasNewObj); //------------------------------------------------------------------------ // We expect the following expression tree at this point - // * GT_STMT void (top level) - // | /--* GT_ALLOCOBJ ref - // \--* GT_ASG ref - // \--* GT_LCL_VAR ref + // * STMT void + // | /--* ALLOCOBJ ref + // | | \--* CNS_INT(h) long + // \--* ASG ref + // \--* LCL_VAR ref //------------------------------------------------------------------------ GenTree* op1 = stmtExpr->gtGetOp1(); @@ -109,15 +309,29 @@ void ObjectAllocator::MorphAllocObjNodes() assert(op2 != nullptr); assert(op2->OperGet() == GT_ALLOCOBJ); - GenTreeAllocObj* asAllocObj = op2->AsAllocObj(); - unsigned int lclNum = op1->AsLclVar()->GetLclNum(); + GenTreeAllocObj* asAllocObj = op2->AsAllocObj(); + unsigned int lclNum = op1->AsLclVar()->GetLclNum(); + CORINFO_CLASS_HANDLE clsHnd = op2->AsAllocObj()->gtAllocObjClsHnd; - if (IsObjectStackAllocationEnabled() && CanAllocateLclVarOnStack(lclNum)) + // Don't attempt to do stack allocations inside basic blocks that may be in a loop. + if (IsObjectStackAllocationEnabled() && !basicBlockHasBackwardJump && + CanAllocateLclVarOnStack(lclNum, clsHnd)) { - op2 = MorphAllocObjNodeIntoStackAlloc(asAllocObj, block, stmt); + JITDUMP("Allocating local variable V%02u on the stack\n", lclNum); + + const unsigned int stackLclNum = MorphAllocObjNodeIntoStackAlloc(asAllocObj, block, stmt); + m_HeapLocalToStackLocalMap.AddOrUpdate(lclNum, stackLclNum); + stmt->gtStmtExpr->gtBashToNOP(); + comp->optMethodFlags |= OMF_HAS_OBJSTACKALLOC; + didStackAllocate = true; } else { + if (IsObjectStackAllocationEnabled()) + { + JITDUMP("Allocating local variable V%02u on the heap\n", lclNum); + } + op2 = MorphAllocObjNodeIntoHelperCall(asAllocObj); } @@ -125,6 +339,7 @@ void ObjectAllocator::MorphAllocObjNodes() stmtExpr->gtOp.gtOp2 = op2; stmtExpr->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT; } + #ifdef DEBUG else { @@ -135,6 +350,8 @@ void ObjectAllocator::MorphAllocObjNodes() #endif // DEBUG } } + + return didStackAllocate; } //------------------------------------------------------------------------ @@ -149,6 +366,7 @@ void ObjectAllocator::MorphAllocObjNodes() // // Notes: // Must update parents flags after this. + GenTree* ObjectAllocator::MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* allocObj) { assert(allocObj != nullptr); @@ -166,34 +384,233 @@ GenTree* ObjectAllocator::MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* alloc // MorphAllocObjNodeIntoStackAlloc: Morph a GT_ALLOCOBJ node into stack // allocation. // Arguments: -// allocObj - GT_ALLOCOBJ that will be replaced by helper call. +// allocObj - GT_ALLOCOBJ that will be replaced by a stack allocation // block - a basic block where allocObj is // stmt - a statement where allocObj is // // Return Value: -// Address of tree doing stack allocation (can be the same as allocObj). +// local num for the new stack allocated local // // Notes: -// Must update parents flags after this. // This function can insert additional statements before stmt. -GenTree* ObjectAllocator::MorphAllocObjNodeIntoStackAlloc(GenTreeAllocObj* allocObj, - BasicBlock* block, - GenTreeStmt* stmt) + +unsigned int ObjectAllocator::MorphAllocObjNodeIntoStackAlloc(GenTreeAllocObj* allocObj, + BasicBlock* block, + GenTreeStmt* stmt) { assert(allocObj != nullptr); assert(m_AnalysisDone); - // TODO-StackAllocation - NYI("MorphAllocObjIntoStackAlloc"); + const bool shortLifetime = false; + const unsigned int lclNum = comp->lvaGrabTemp(shortLifetime DEBUGARG("MorphAllocObjNodeIntoStackAlloc temp")); + const int unsafeValueClsCheck = true; + comp->lvaSetStruct(lclNum, allocObj->gtAllocObjClsHnd, unsafeValueClsCheck); + + // Initialize the object memory if necessary + if (comp->fgStructTempNeedsExplicitZeroInit(comp->lvaTable + lclNum, block)) + { + unsigned int structSize = comp->lvaTable[lclNum].lvSize(); + + //------------------------------------------------------------------------ + // * STMT void + // | /--* CNS_INT int 0 + // \--* ASG struct (init) + // \--* LCL_VAR struct + //------------------------------------------------------------------------ + + GenTree* tree = comp->gtNewLclvNode(lclNum, TYP_STRUCT); + const bool isVolatile = false; + const bool isCopyBlock = false; + tree = comp->gtNewBlkOpNode(tree, comp->gtNewIconNode(0), structSize, isVolatile, isCopyBlock); + + GenTreeStmt* newStmt = comp->gtNewStmt(tree); + + comp->fgInsertStmtBefore(block, stmt, newStmt); + } + + //------------------------------------------------------------------------ + // * STMT void + // | /--* CNS_INT(h) long + // \--* ASG long + // \--* FIELD long #PseudoField:0x0 + // \--* ADDR byref + // \--* LCL_VAR struct + //------------------------------------------------------------------------ + + // Create a local representing the object + GenTree* tree = comp->gtNewLclvNode(lclNum, TYP_STRUCT); - return allocObj; + // Add a pseudo-field for the method table pointer and initialize it + tree = comp->gtNewOperNode(GT_ADDR, TYP_BYREF, tree); + tree = comp->gtNewFieldRef(TYP_I_IMPL, FieldSeqStore::FirstElemPseudoField, tree, 0); + tree = comp->gtNewAssignNode(tree, allocObj->gtGetOp1()); + + GenTreeStmt* newStmt = comp->gtNewStmt(tree); + + comp->fgInsertStmtBefore(block, stmt, newStmt); + + return lclNum; } -#ifdef DEBUG +//------------------------------------------------------------------------ +// CanLclVarEscapeViaParentStack: Check if the local variable escapes via the given parent stack. +// Update the connection graph as necessary. +// +// Arguments: +// parentStack - Parent stack of the current visit +// lclNum - Local variable number +// +// Return Value: +// true if the local can escape via the parent stack; false otherwise +// +// Notes: +// The method currently treats all locals assigned to a field as escaping. +// The can potentially be tracked by special field edges in the connection graph. + +bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack* parentStack, unsigned int lclNum) +{ + assert(parentStack != nullptr); + int parentIndex = 1; + + bool keepChecking = true; + bool canLclVarEscapeViaParentStack = true; + + while (keepChecking) + { + if (parentStack->Height() <= parentIndex) + { + canLclVarEscapeViaParentStack = false; + break; + } + + canLclVarEscapeViaParentStack = true; + GenTree* tree = parentStack->Index(parentIndex - 1); + GenTree* parent = parentStack->Index(parentIndex); + keepChecking = false; + + switch (parent->OperGet()) + { + case GT_ASG: + { + // Use the following conservative behavior for GT_ASG parent node: + // Consider local variable to be escaping if + // 1. lclVar appears on the rhs of a GT_ASG node + // AND + // 2. The lhs of the GT_ASG is not another lclVar + + GenTree* op1 = parent->AsOp()->gtGetOp1(); + + if (op1 == tree) + { + // Assigning to a local doesn't make it escaping. + // If there is another local variable on the rhs, + // we will update the connection graph when we visit it. + canLclVarEscapeViaParentStack = false; + } + else + { + // lclVar is on the rhs of GT_ASG node + assert(parent->AsOp()->gtGetOp2() == tree); + + // Update the connection graph if we are assigning to a local. + // For all other assignments we mark the rhs local as escaping. + // TODO-ObjectStackAllocation: track assignments to fields. + if (op1->OperGet() == GT_LCL_VAR) + { + // We expect the following tree at this point + // /--* GT_LCL_VAR ref rhsLclVar + // --* = ref + // \--* GT_LCL_VAR ref lhsLclVar + + // Add an edge to the connection graph. + const unsigned int lhsLclNum = op1->AsLclVar()->GetLclNum(); + const unsigned int rhsLclNum = lclNum; + + AddConnGraphEdge(lhsLclNum, rhsLclNum); + canLclVarEscapeViaParentStack = false; + } + } + break; + } + + case GT_EQ: + case GT_NE: + canLclVarEscapeViaParentStack = false; + break; + + case GT_COMMA: + if (parent->AsOp()->gtGetOp1() == parentStack->Index(parentIndex - 1)) + { + // Left child of GT_COMMA, it will be discarded + canLclVarEscapeViaParentStack = false; + break; + } + __fallthrough; + case GT_COLON: + case GT_QMARK: + case GT_ADD: + // Check whether the local escapes via its grandparent. + ++parentIndex; + keepChecking = true; + break; + + case GT_FIELD: + case GT_IND: + { + int grandParentIndex = parentIndex + 1; + if ((parentStack->Height() > grandParentIndex) && + (parentStack->Index(grandParentIndex)->OperGet() == GT_ADDR)) + { + // Check if the address of the field/ind escapes. + parentIndex += 2; + keepChecking = true; + } + else + { + // Address of the field/ind is not taken so the local doesn't escape. + canLclVarEscapeViaParentStack = false; + } + break; + } + + case GT_CALL: + { + GenTreeCall* asCall = parent->AsCall(); + if (asCall->gtCallType == CT_HELPER) + { + const CorInfoHelpFunc helperNum = comp->eeGetHelperNum(asCall->gtCallMethHnd); + + if (Compiler::s_helperCallProperties.IsPure(helperNum)) + { + // Pure helpers don't modify the heap. + // TODO-ObjectStackAllocation: We may be able to special-case more helpers here. + canLclVarEscapeViaParentStack = false; + } + } + break; + } + + default: + break; + } + } + + return canLclVarEscapeViaParentStack; +} + +#ifdef DEBUG //------------------------------------------------------------------------ // AssertWhenAllocObjFoundVisitor: Look for a GT_ALLOCOBJ node and assert // when found one. +// +// Arguments: +// pTree - Tree to examine +// data - Walker data +// +// Return Value: +// Always returns fgWalkResult::WALK_CONTINUE + Compiler::fgWalkResult ObjectAllocator::AssertWhenAllocObjFoundVisitor(GenTree** pTree, Compiler::fgWalkData* data) { GenTree* tree = *pTree; @@ -206,4 +623,56 @@ Compiler::fgWalkResult ObjectAllocator::AssertWhenAllocObjFoundVisitor(GenTree** #endif // DEBUG -//=============================================================================== +//------------------------------------------------------------------------ +// RewriteUses: Find uses of the newobj temp for stack-allocated +// objects and replace with address of the stack local. + +void ObjectAllocator::RewriteUses() +{ + class RewriteUsesVisitor final : public GenTreeVisitor + { + ObjectAllocator* m_allocator; + + public: + enum + { + DoPreOrder = true, + DoLclVarsOnly = true, + }; + + RewriteUsesVisitor(ObjectAllocator* allocator) + : GenTreeVisitor(allocator->comp), m_allocator(allocator) + { + } + + Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + { + GenTree* tree = *use; + assert(tree != nullptr); + assert(tree->IsLocal()); + + const unsigned int lclNum = tree->AsLclVarCommon()->gtLclNum; + unsigned int newLclNum = BAD_VAR_NUM; + + if (m_allocator->m_HeapLocalToStackLocalMap.TryGetValue(lclNum, &newLclNum)) + { + GenTree* newTree = + m_compiler->gtNewOperNode(GT_ADDR, TYP_I_IMPL, m_compiler->gtNewLclvNode(newLclNum, TYP_STRUCT)); + *use = newTree; + } + + return Compiler::fgWalkResult::WALK_CONTINUE; + } + }; + + BasicBlock* block; + + foreach_block(comp, block) + { + for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) + { + RewriteUsesVisitor rewriteUsesVisitor(this); + rewriteUsesVisitor.WalkTree(&stmt->gtStmtExpr, nullptr); + } + } +} diff --git a/src/jit/objectalloc.h b/src/jit/objectalloc.h index d6a859d83c..7be82ebb33 100644 --- a/src/jit/objectalloc.h +++ b/src/jit/objectalloc.h @@ -18,13 +18,21 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX //=============================================================================== #include "phase.h" +#include "smallhash.h" class ObjectAllocator final : public Phase { + typedef SmallHashTable LocalToLocalMap; + //=============================================================================== // Data members - bool m_IsObjectStackAllocationEnabled; - bool m_AnalysisDone; + bool m_IsObjectStackAllocationEnabled; + bool m_AnalysisDone; + BitVecTraits m_bitVecTraits; + BitVec m_EscapingPointers; + LocalToLocalMap m_HeapLocalToStackLocalMap; + BitSetShortLongRep* m_ConnGraphAdjacencyMatrix; + //=============================================================================== // Methods public: @@ -36,14 +44,24 @@ protected: virtual void DoPhase() override; private: - bool CanAllocateLclVarOnStack(unsigned int lclNum) const; - void DoAnalysis(); - void MorphAllocObjNodes(); + bool CanAllocateLclVarOnStack(unsigned int lclNum, CORINFO_CLASS_HANDLE clsHnd); + bool CanLclVarEscape(unsigned int lclNum); + void DoAnalysis(); + void MarkLclVarAsEscaping(unsigned int lclNum); + bool IsLclVarEscaping(unsigned int lclNum); + void MarkEscapingVarsAndBuildConnGraph(); + void AddConnGraphEdge(unsigned int sourceLclNum, unsigned int targetLclNum); + void ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& escapingNodes); + bool MorphAllocObjNodes(); + void RewriteUses(); GenTree* MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* allocObj); - GenTree* MorphAllocObjNodeIntoStackAlloc(GenTreeAllocObj* allocObj, BasicBlock* block, GenTreeStmt* stmt); + unsigned int MorphAllocObjNodeIntoStackAlloc(GenTreeAllocObj* allocObj, BasicBlock* block, GenTreeStmt* stmt); + struct BuildConnGraphVisitorCallbackData; + bool CanLclVarEscapeViaParentStack(ArrayStack* parentStack, unsigned int lclNum); #ifdef DEBUG static Compiler::fgWalkResult AssertWhenAllocObjFoundVisitor(GenTree** pTree, Compiler::fgWalkData* data); #endif // DEBUG + static const unsigned int s_StackAllocMaxSize = 0x2000U; }; //=============================================================================== @@ -52,32 +70,91 @@ inline ObjectAllocator::ObjectAllocator(Compiler* comp) : Phase(comp, "Allocate Objects", PHASE_ALLOCATE_OBJECTS) , m_IsObjectStackAllocationEnabled(false) , m_AnalysisDone(false) + , m_bitVecTraits(comp->lvaCount, comp) + , m_HeapLocalToStackLocalMap(comp->getAllocator()) { // Disable checks since this phase runs before fgComputePreds phase. // Checks are not expected to pass before fgComputePreds. - doChecks = false; + doChecks = false; + m_EscapingPointers = BitVecOps::UninitVal(); + m_ConnGraphAdjacencyMatrix = nullptr; } +//------------------------------------------------------------------------ +// IsObjectStackAllocationEnabled: Returns true iff object stack allocation is enabled +// +// Return Value: +// Returns true iff object stack allocation is enabled + inline bool ObjectAllocator::IsObjectStackAllocationEnabled() const { return m_IsObjectStackAllocationEnabled; } +//------------------------------------------------------------------------ +// EnableObjectStackAllocation: Enable object stack allocation. + inline void ObjectAllocator::EnableObjectStackAllocation() { m_IsObjectStackAllocationEnabled = true; } //------------------------------------------------------------------------ -// CanAllocateLclVarOnStack: Returns true iff local variable can not -// potentially escape from the method and -// can be allocated on the stack. -inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNum) const +// CanAllocateLclVarOnStack: Returns true iff local variable can be +// allocated on the stack. +// +// Arguments: +// lclNum - Local variable number +// clsHnd - Class handle of the variable class +// +// Return Value: +// Returns true iff local variable can be allocated on the stack. +// +// Notes: +// Stack allocation of objects with gc fields and boxed objects is currently disabled. + +inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNum, CORINFO_CLASS_HANDLE clsHnd) +{ + assert(m_AnalysisDone); + + DWORD classAttribs = comp->info.compCompHnd->getClassAttribs(clsHnd); + + if ((classAttribs & CORINFO_FLG_CONTAINS_GC_PTR) != 0) + { + // TODO-ObjectStackAllocation: enable stack allocation of objects with gc fields + return false; + } + + if ((classAttribs & CORINFO_FLG_VALUECLASS) != 0) + { + // TODO-ObjectStackAllocation: enable stack allocation of boxed structs + return false; + } + + if (!comp->info.compCompHnd->canAllocateOnStack(clsHnd)) + { + return false; + } + + const unsigned int classSize = comp->info.compCompHnd->getHeapClassSize(clsHnd); + + return !CanLclVarEscape(lclNum) && (classSize <= s_StackAllocMaxSize); +} + +//------------------------------------------------------------------------ +// CanLclVarEscape: Returns true iff local variable can +// potentially escape from the method +// +// Arguments: +// lclNum - Local variable number +// +// Return Value: +// Returns true iff local variable can potentially escape from the method + +inline bool ObjectAllocator::CanLclVarEscape(unsigned int lclNum) { assert(m_AnalysisDone); - // TODO-ObjectStackAllocation - NYI("CanAllocateLclVarOnStack"); - return false; + return BitVecOps::IsMember(&m_bitVecTraits, m_EscapingPointers, lclNum); } //=============================================================================== diff --git a/tests/issues.targets b/tests/issues.targets index c18349b3e8..b0f5098c27 100644 --- a/tests/issues.targets +++ b/tests/issues.targets @@ -312,6 +312,13 @@ + + + + Object stack allocation is not supported on x86 yet + + + diff --git a/tests/src/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests.cs b/tests/src/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests.cs new file mode 100644 index 0000000000..b01636802b --- /dev/null +++ b/tests/src/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests.cs @@ -0,0 +1,232 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Reflection; +using System.Diagnostics; +using System.Runtime.CompilerServices; + +namespace ObjectStackAllocation +{ + class SimpleClassA + { + public int f1; + public int f2; + + public SimpleClassA(int f1, int f2) + { + this.f1 = f1; + this.f2 = f2; + } + } + + sealed class SimpleClassB + { + public long f1; + public long f2; + + public SimpleClassB(long f1, long f2) + { + this.f1 = f1; + this.f2 = f2; + } + } + + class SimpleClassWithGCField : SimpleClassA + { + public object o; + + public SimpleClassWithGCField(int f1, int f2, object o) : base(f1, f2) + { + this.o = o; + } + } + + class ClassWithNestedStruct + { + public ClassWithNestedStruct(int f1, int f2) + { + ns.f1 = f1; + ns.f2 = f2; + ns.s.f1 = f1; + ns.s.f2 = f2; + } + + public NestedStruct ns; + } + + struct SimpleStruct + { + public int f1; + public int f2; + } + + struct NestedStruct + { + public int f1; + public int f2; + public SimpleStruct s; + } + + class Tests + { + static volatile int f1 = 5; + static volatile int f2 = 7; + + delegate int Test(); + + static int methodResult = 100; + + public static int Main() + { + bool spcOptimizationEnabled = SPCOptimizationsEnabled(); + + CallTestAndVerifyAllocation(AllocateSimpleClassAndAddFields, 12, !spcOptimizationEnabled); + + CallTestAndVerifyAllocation(AllocateSimpleClassesAndEQCompareThem, 0, !spcOptimizationEnabled); + + CallTestAndVerifyAllocation(AllocateSimpleClassesAndNECompareThem, 1, !spcOptimizationEnabled); + + CallTestAndVerifyAllocation(AllocateSimpleClassAndCheckType, 1, !spcOptimizationEnabled); + + CallTestAndVerifyAllocation(AllocateSimpleClassAndCast, 7, !spcOptimizationEnabled); + + CallTestAndVerifyAllocation(AllocateSimpleClassAndGetField, 7, !spcOptimizationEnabled); + + CallTestAndVerifyAllocation(AllocateClassWithNestedStructAndGetField, 5, !spcOptimizationEnabled); + + CallTestAndVerifyAllocation(AllocateClassWithNestedStructAndAddFields, 24, !spcOptimizationEnabled); + + // Stack allocation of classes with GC fields is currently disabled + CallTestAndVerifyAllocation(AllocateSimpleClassWithGCFieldAndAddFields, 12, true); + + // Assigning class ref to a field of another object currently always disables stack allocation + CallTestAndVerifyAllocation(AllocateSimpleClassAndAssignRefToAField, 12, true); + + // Stack allocation of boxed structs is currently disabled + CallTestAndVerifyAllocation(BoxSimpleStructAndAddFields, 12, true); + + return methodResult; + } + + static bool SPCOptimizationsEnabled() + { + Assembly objectAssembly = Assembly.GetAssembly(typeof(object)); + object[] attribs = objectAssembly.GetCustomAttributes(typeof(DebuggableAttribute), + false); + DebuggableAttribute debuggableAttribute = attribs[0] as DebuggableAttribute; + return ((debuggableAttribute == null) || !debuggableAttribute.IsJITOptimizerDisabled); + } + + static void CallTestAndVerifyAllocation(Test test, int expectedResult, bool expectHeapAllocations) + { + long allocatedBytesBefore = GC.GetAllocatedBytesForCurrentThread(); + int testResult = test(); + long allocatedBytesAfter = GC.GetAllocatedBytesForCurrentThread(); + string methodName = test.Method.Name; + + if (testResult != expectedResult) { + Console.WriteLine($"FAILURE ({methodName}): expected {expectedResult}, got {testResult}"); + methodResult = -1; + } + else if (!expectHeapAllocations && (allocatedBytesBefore != allocatedBytesAfter)) { + Console.WriteLine($"FAILURE ({methodName}): unexpected allocation of {allocatedBytesAfter - allocatedBytesBefore} bytes"); + methodResult = -1; + } + else if (expectHeapAllocations && (allocatedBytesBefore == allocatedBytesAfter)) { + Console.WriteLine($"FAILURE ({methodName}): unexpected stack allocation"); + methodResult = -1; + } + else { + Console.WriteLine($"SUCCESS ({methodName})"); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int AllocateSimpleClassAndAddFields() + { + SimpleClassA a = new SimpleClassA(f1, f2); + GC.Collect(); + return a.f1 + a.f2; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int AllocateSimpleClassesAndEQCompareThem() + { + SimpleClassA a1 = new SimpleClassA(f1, f2); + SimpleClassA a2 = (f1 == 0) ? a1 : new SimpleClassA(f2, f1); + return (a1 == a2) ? 1 : 0; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int AllocateSimpleClassesAndNECompareThem() + { + SimpleClassA a1 = new SimpleClassA(f1, f2); + SimpleClassA a2 = (f1 == 0) ? a1 : new SimpleClassA(f2, f1); + return (a1 != a2) ? 1 : 0; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int AllocateSimpleClassAndCheckType() + { + object o = (f1 == 0) ? (object)new SimpleClassB(f1, f2) : (object)new SimpleClassA(f1, f2); + return (o is SimpleClassB) || !(o is SimpleClassA) ? 0 : 1; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int AllocateSimpleClassAndCast() + { + object o = (f1 == 0) ? (object)new SimpleClassB(f1, f2) : (object)new SimpleClassA(f2, f1); + return ((SimpleClassA)o).f1; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int AllocateSimpleClassAndGetField() + { + SimpleClassA a = new SimpleClassA(f1, f2); + ref int f = ref a.f2; + return f; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int AllocateClassWithNestedStructAndGetField() + { + ClassWithNestedStruct c = new ClassWithNestedStruct(f1, f2); + ref int f = ref c.ns.s.f1; + return f; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int AllocateClassWithNestedStructAndAddFields() + { + ClassWithNestedStruct c = new ClassWithNestedStruct(f1, f2); + return c.ns.f1 + c.ns.f2 + c.ns.s.f1 + c.ns.s.f2; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int AllocateSimpleClassWithGCFieldAndAddFields() + { + SimpleClassWithGCField c = new SimpleClassWithGCField(f1, f2, null); + return c.f1 + c.f2; + } + + static int AllocateSimpleClassAndAssignRefToAField() + { + SimpleClassWithGCField c = new SimpleClassWithGCField(f1, f2, null); + SimpleClassA a = new SimpleClassA(f1, f2); + c.o = a; + return c.f1 + c.f2; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int BoxSimpleStructAndAddFields() + { + SimpleStruct str; + str.f1 = f1; + str.f2 = f2; + object boxedSimpleStruct = (object)str; + return ((SimpleStruct)boxedSimpleStruct).f1 + ((SimpleStruct)boxedSimpleStruct).f2; + } + } +} diff --git a/tests/src/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests.csproj b/tests/src/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests.csproj new file mode 100644 index 0000000000..280c9912f0 --- /dev/null +++ b/tests/src/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests.csproj @@ -0,0 +1,54 @@ + + + + + Debug + AnyCPU + $(MSBuildProjectName) + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + + + + + + + False + + + + None + True + + + + + + + + + + + + + + + + + + -- cgit v1.2.3