summaryrefslogtreecommitdiff
path: root/src/jit/loopcloning.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/loopcloning.h')
-rw-r--r--src/jit/loopcloning.h667
1 files changed, 667 insertions, 0 deletions
diff --git a/src/jit/loopcloning.h b/src/jit/loopcloning.h
new file mode 100644
index 0000000000..40793afcf1
--- /dev/null
+++ b/src/jit/loopcloning.h
@@ -0,0 +1,667 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX LoopCloning XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+ Loop cloning optimizations comprise of the following steps:
+ - Loop detection logic which is existing logic in the JIT that records
+ loop information with loop flags.
+ - The next step is to identify loop optimization candidates. This is done
+ by optObtainLoopCloningOpts. The loop context variable is updated with
+ all the necessary information (for ex: block, stmt, tree information)
+ to do the optimization later.
+ a) This involves checking if the loop is well-formed with respect to
+ the optimization being performed.
+ b) In array bounds check case, reconstructing the morphed GT_INDEX
+ nodes back to their array representation.
+ i) The array index is stored in the "context" variable with
+ additional block, tree, stmt info.
+ - Once the optimization candidates are identified, we derive cloning conditions
+ For ex: to clone a simple "for (i=0; i<n; ++i) { a[i] }" loop, we need the
+ following conditions:
+ (a != null) && ((n >= 0) & (n <= a.length) & (stride > 0))
+ a) Note the short circuit AND for (a != null). These are called block
+ conditions or deref-conditions since these conditions need to be in their
+ own blocks to be able to short-circuit.
+ i) For a doubly nested loop on i, j, we would then have
+ conditions like
+ (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len)
+ all short-circuiting creating blocks.
+
+ Advantage:
+ All conditions are checked before we enter the fast path. So fast
+ path gets as fast as it can be.
+
+ Disadvantage:
+ Creation of blocks.
+
+ Heuristic:
+ Therefore we will not clone if we exceed creating 4 blocks.
+
+ b) The other conditions called cloning conditions are transformed into LC_Condition
+ structs which are then optimized.
+ i) Optimization of conditions involves removing redundant condition checks.
+ ii) If some conditions evaluate to true statically, then they are removed.
+ iii) If any condition evaluates to false statically, then loop cloning is
+ aborted for that loop.
+ - Then the block splitting occurs and loop cloning conditions is transformed into
+ GenTree and added to the loop cloning choice block.
+
+ Preconditions
+ - Loop detection should have completed and the loop table should be
+ populated with the loop dscs.
+ - The loops that will be considered are the ones with the LPFLG_ITER
+ marked on them.
+
+ Limitations
+ - For array based optimizations the loop choice condition is checked
+ before the loop body. This implies that the loop initializer statement
+ has not executed at the time of the check. So any loop cloning condition
+ involving the initial value of the loop counter cannot be condition checked
+ as it hasn't been assigned yet at the time of condition checking. Therefore
+ the initial value has to be statically known. This can be fixed with further
+ effort.
+
+ Assumption
+ - The assumption is that the optimization candidates collected during the
+ identification phase will be the ones that will be optimized. In other words,
+ the loop that is present originally will be the fast path. Explicitly, the cloned
+ path will be the slow path and will be unoptimized. This allows us to
+ collect additional information at the same time of identifying the optimization
+ candidates. This later helps us to perform the optimizations during actual cloning.
+ - All loop cloning choice conditions will automatically be "AND"-ed. These are
+ bitwise AND operations.
+ - Perform short circuit AND for (array != null) side effect check
+ before hoisting (limit <= a.length) check.
+ For ex: to clone a simple "for (i=0; i<n; ++i) { a[i] }" loop, we need the
+ following conditions:
+ (a != null) && ((n >= 0) & (n <= a.length) & (stride > 0))
+
+*/
+#pragma once
+
+class Compiler;
+
+/**
+ *
+ * Represents an array access and associated bounds checks.
+ * Array access is required have the array and indices in local variables.
+ * This struct is constructed using a GT_INDEX node that is broken into
+ * its sub trees.
+ *
+ */
+struct ArrIndex
+{
+ unsigned arrLcl; // The array base local num
+ ExpandArrayStack<unsigned> indLcls; // The indices local nums
+ ExpandArrayStack<GenTree*> bndsChks; // The bounds checks nodes along each dimension.
+ unsigned rank; // Rank of the array
+ BasicBlock* useBlock; // Block where the [] occurs
+
+ ArrIndex(IAllocator* alloc) : arrLcl(BAD_VAR_NUM), indLcls(alloc), bndsChks(alloc), rank(0), useBlock(nullptr)
+ {
+ }
+
+#ifdef DEBUG
+ void Print(unsigned dim = -1)
+ {
+ printf("V%02d", arrLcl);
+ for (unsigned i = 0; i < ((dim == -1) ? rank : dim); ++i)
+ {
+ printf("[V%02d]", indLcls.GetRef(i));
+ }
+ }
+#endif
+};
+
+// Forward declarations
+#define LC_OPT(en) struct en##OptInfo;
+#include "loopcloningopts.h"
+
+/**
+ *
+ * LcOptInfo represents the optimization information for loop cloning,
+ * other classes are supposed to derive from this base class.
+ *
+ * Example usage:
+ * LcMdArrayOptInfo is multi-dimensional array optimization for which the
+ * loop can be cloned.
+ * LcArrIndexOptInfo is a jagged array optimization for which the loop
+ * can be cloned.
+ *
+ * So LcOptInfo represents any type of optimization opportunity that
+ * occurs in a loop and the metadata for the optimization is stored in
+ * this class.
+ */
+struct LcOptInfo
+{
+ enum OptType
+ {
+#undef LC_OPT
+#define LC_OPT(en) en,
+#include "loopcloningopts.h"
+ };
+
+ void* optInfo;
+ OptType optType;
+ LcOptInfo(void* optInfo, OptType optType) : optInfo(optInfo), optType(optType)
+ {
+ }
+
+ OptType GetOptType()
+ {
+ return optType;
+ }
+#undef LC_OPT
+#define LC_OPT(en) \
+ en##OptInfo* As##en##OptInfo() \
+ { \
+ assert(optType == en); \
+ return reinterpret_cast<en##OptInfo*>(this); \
+ }
+#include "loopcloningopts.h"
+};
+
+/**
+ *
+ * Optimization info for a multi-dimensional array.
+ */
+struct LcMdArrayOptInfo : public LcOptInfo
+{
+ GenTreeArrElem* arrElem; // "arrElem" node of an MD array.
+ unsigned dim; // "dim" represents upto what level of the rank this optimization applies to.
+ // For example, a[i,j,k] could be the MD array "arrElem" but if "dim" is 2,
+ // then this node is treated as though it were a[i,j]
+ ArrIndex* index; // "index" cached computation in the form of an ArrIndex representation.
+
+ LcMdArrayOptInfo(GenTreeArrElem* arrElem, unsigned dim)
+ : LcOptInfo(this, LcMdArray), arrElem(arrElem), dim(dim), index(nullptr)
+ {
+ }
+
+ ArrIndex* GetArrIndexForDim(IAllocator* alloc)
+ {
+ if (index == nullptr)
+ {
+ index = new (alloc) ArrIndex(alloc);
+ index->rank = arrElem->gtArrRank;
+ for (unsigned i = 0; i < dim; ++i)
+ {
+ index->indLcls.Push(arrElem->gtArrInds[i]->gtLclVarCommon.gtLclNum);
+ }
+ index->arrLcl = arrElem->gtArrObj->gtLclVarCommon.gtLclNum;
+ }
+ return index;
+ }
+};
+
+/**
+ *
+ * Optimization info for a jagged array.
+ */
+struct LcJaggedArrayOptInfo : public LcOptInfo
+{
+ unsigned dim; // "dim" represents upto what level of the rank this optimization applies to.
+ // For example, a[i][j][k] could be the jagged array but if "dim" is 2,
+ // then this node is treated as though it were a[i][j]
+ ArrIndex arrIndex; // ArrIndex representation of the array.
+ GenTreePtr stmt; // "stmt" where the optimization opportunity occurs.
+
+ LcJaggedArrayOptInfo(ArrIndex& arrIndex, unsigned dim, GenTreePtr stmt)
+ : LcOptInfo(this, LcJaggedArray), dim(dim), arrIndex(arrIndex), stmt(stmt)
+ {
+ }
+};
+
+/**
+ *
+ * Symbolic representation of a.length, or a[i][j].length or a[i,j].length and so on.
+ * OperType decides whether "arrLength" is invoked on the array or if it is just an array.
+ */
+struct LC_Array
+{
+ enum ArrType
+ {
+ Invalid,
+ Jagged,
+ MdArray
+ };
+
+ enum OperType
+ {
+ None,
+ ArrLen,
+ };
+
+ ArrType type; // The type of the array on which to invoke length operator.
+ ArrIndex* arrIndex; // ArrIndex representation of this array.
+
+ OperType oper;
+
+#ifdef DEBUG
+ void Print()
+ {
+ arrIndex->Print(dim);
+ if (oper == ArrLen)
+ {
+ printf(".Length");
+ }
+ }
+#endif
+
+ int dim; // "dim" = which index to invoke arrLen on, if -1 invoke on the whole array
+ // Example 1: a[0][1][2] and dim = 2 implies a[0][1].length
+ // Example 2: a[0][1][2] and dim = -1 implies a[0][1][2].length
+ LC_Array() : type(Invalid), dim(-1)
+ {
+ }
+ LC_Array(ArrType type, ArrIndex* arrIndex, int dim, OperType oper)
+ : type(type), arrIndex(arrIndex), oper(oper), dim(dim)
+ {
+ }
+
+ LC_Array(ArrType type, ArrIndex* arrIndex, OperType oper) : type(type), arrIndex(arrIndex), oper(oper), dim(-1)
+ {
+ }
+
+ // Equality operator
+ bool operator==(const LC_Array& that) const
+ {
+ assert(type != Invalid && that.type != Invalid);
+
+ // Types match and the array base matches.
+ if (type != that.type || arrIndex->arrLcl != that.arrIndex->arrLcl || oper != that.oper)
+ {
+ return false;
+ }
+
+ // If the dim ranks are not matching, quit.
+ int rank1 = GetDimRank();
+ int rank2 = that.GetDimRank();
+ if (rank1 != rank2)
+ {
+ return false;
+ }
+
+ // Check for the indices.
+ for (int i = 0; i < rank1; ++i)
+ {
+ if (arrIndex->indLcls[i] != that.arrIndex->indLcls[i])
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // The max dim on which length is invoked.
+ int GetDimRank() const
+ {
+ return (dim < 0) ? (int)arrIndex->rank : dim;
+ }
+
+ // Get a tree representation for this symbolic a.length
+ GenTreePtr ToGenTree(Compiler* comp);
+};
+
+/**
+ *
+ * Symbolic representation of either a constant like 1, 2 or a variable V02, V03 etc. or an "LC_Array" or the null
+ * constant.
+ */
+struct LC_Ident
+{
+ enum IdentType
+ {
+ Invalid,
+ Const,
+ Var,
+ ArrLen,
+ Null,
+ };
+
+ INT64 constant; // The constant value if this node is of type "Const", or the lcl num if "Var"
+ LC_Array arrLen; // The LC_Array if the type is "ArrLen"
+ IdentType type; // The type of this object
+
+ // Equality operator
+ bool operator==(const LC_Ident& that) const
+ {
+ switch (type)
+ {
+ case Const:
+ case Var:
+ return (type == that.type) && constant == that.constant;
+ case ArrLen:
+ return (type == that.type) && (arrLen == that.arrLen);
+ case Null:
+ return (type == that.type);
+ default:
+ assert(!"Unknown LC_Ident type");
+ unreached();
+ }
+ }
+
+#ifdef DEBUG
+ void Print()
+ {
+ switch (type)
+ {
+ case Const:
+ printf("%I64d", constant);
+ break;
+ case Var:
+ printf("V%02d", constant);
+ break;
+ case ArrLen:
+ arrLen.Print();
+ break;
+ case Null:
+ printf("null");
+ break;
+ default:
+ assert(false);
+ break;
+ }
+ }
+#endif
+
+ LC_Ident() : type(Invalid)
+ {
+ }
+ LC_Ident(INT64 constant, IdentType type) : constant(constant), type(type)
+ {
+ }
+ explicit LC_Ident(IdentType type) : type(type)
+ {
+ }
+ explicit LC_Ident(const LC_Array& arrLen) : arrLen(arrLen), type(ArrLen)
+ {
+ }
+
+ // Convert this symbolic representation into a tree node.
+ GenTreePtr ToGenTree(Compiler* comp);
+};
+
+/**
+ *
+ * Symbolic representation of an expr that involves an "LC_Ident" or an "LC_Ident - constant"
+ */
+struct LC_Expr
+{
+ enum ExprType
+ {
+ Invalid,
+ Ident,
+ IdentPlusConst
+ };
+
+ LC_Ident ident;
+ INT64 constant;
+ ExprType type;
+
+ // Equality operator
+ bool operator==(const LC_Expr& that) const
+ {
+ assert(type != Invalid && that.type != Invalid);
+
+ // If the types don't match quit.
+ if (type != that.type)
+ {
+ return false;
+ }
+
+ // If the type involves arithmetic, the constant should match.
+ if (type == IdentPlusConst && constant != that.constant)
+ {
+ return false;
+ }
+
+ // Check if the ident match.
+ return (ident == that.ident);
+ }
+
+#ifdef DEBUG
+ void Print()
+ {
+ if (type == IdentPlusConst)
+ {
+ printf("(%I64d - ", constant);
+ ident.Print();
+ printf(")");
+ }
+ else
+ {
+ ident.Print();
+ }
+ }
+#endif
+
+ LC_Expr() : type(Invalid)
+ {
+ }
+ explicit LC_Expr(const LC_Ident& ident) : ident(ident), type(Ident)
+ {
+ }
+ LC_Expr(const LC_Ident& ident, INT64 constant) : ident(ident), constant(constant), type(IdentPlusConst)
+ {
+ }
+
+ // Convert LC_Expr into a tree node.
+ GenTreePtr ToGenTree(Compiler* comp);
+};
+
+/**
+ *
+ * Symbolic representation of a conditional operation involving two "LC_Expr":
+ * LC_Expr < LC_Expr, for example: i > 0, i < a.length
+ */
+struct LC_Condition
+{
+ LC_Expr op1;
+ LC_Expr op2;
+ genTreeOps oper;
+
+#ifdef DEBUG
+ void Print()
+ {
+ op1.Print();
+ printf(" %s ", GenTree::NodeName(oper));
+ op2.Print();
+ }
+#endif
+
+ // Check if the condition evaluates statically to true or false, i < i => false, a.length > 0 => true
+ // The result is put in "pResult" parameter and is valid if the method returns "true". Otherwise, the
+ // condition could not be evaluated.
+ bool Evaluates(bool* pResult);
+
+ // Check if two conditions can be combined to yield one condition.
+ bool Combines(const LC_Condition& cond, LC_Condition* newCond);
+
+ LC_Condition()
+ {
+ }
+ LC_Condition(genTreeOps oper, const LC_Expr& op1, const LC_Expr& op2) : op1(op1), op2(op2), oper(oper)
+ {
+ }
+
+ // Convert this conditional operation into a GenTree.
+ GenTreePtr ToGenTree(Compiler* comp);
+};
+
+/**
+ * A deref tree of an array expression.
+ * a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop, then, the tree would be:
+ * a => {
+ * i => {
+ * j => {
+ * k => {}
+ * },
+ * y => {
+ * k => {}
+ * },
+ * }
+ * },
+ * b => {
+ * i => {}
+ * }
+ */
+struct LC_Deref
+{
+ const LC_Array array;
+ ExpandArrayStack<LC_Deref*>* children;
+
+ unsigned level;
+
+ LC_Deref(const LC_Array& array, unsigned level) : array(array), children(nullptr), level(level)
+ {
+ }
+
+ LC_Deref* Find(unsigned lcl);
+
+ unsigned Lcl();
+
+ bool HasChildren();
+ void EnsureChildren(IAllocator* alloc);
+ static LC_Deref* Find(ExpandArrayStack<LC_Deref*>* children, unsigned lcl);
+
+ void DeriveLevelConditions(ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* len);
+#ifdef DEBUG
+ void Print(unsigned indent = 0)
+ {
+ unsigned tab = 4 * indent;
+ printf("%*s%d,%d => {", tab, "", Lcl(), level);
+ if (children != nullptr)
+ {
+ for (unsigned i = 0; i < children->Size(); ++i)
+ {
+ if (i > 0)
+ {
+ printf(",");
+ }
+ printf("\n");
+#ifdef _MSC_VER
+ (*children)[i]->Print(indent + 1);
+#else // _MSC_VER
+ (*((ExpandArray<LC_Deref*>*)children))[i]->Print(indent + 1);
+#endif // _MSC_VER
+ }
+ }
+ printf("\n%*s}", tab, "");
+ }
+#endif
+};
+
+/**
+ *
+ * The "context" represents data that is used for making loop-cloning decisions.
+ * - The data is the collection of optimization opportunities
+ * - and the conditions (LC_Condition) that decide between the fast
+ * path or the slow path.
+ *
+ * BNF for LC_Condition:
+ * LC_Condition : LC_Expr genTreeOps LC_Expr
+ * LC_Expr : LC_Ident | LC_Ident + Constant
+ * LC_Ident : Constant | Var | LC_Array
+ * LC_Array : .
+ * genTreeOps : GT_GE | GT_LE | GT_GT | GT_LT
+ *
+ */
+struct LoopCloneContext
+{
+ IAllocator* alloc; // The allocator
+ ExpandArrayStack<LcOptInfo*>** optInfo; // The array of optimization opportunities found in each loop. (loop x
+ // optimization-opportunities)
+ ExpandArrayStack<LC_Condition>** conditions; // The array of conditions that influence which path to take for each
+ // loop. (loop x cloning-conditions)
+ ExpandArrayStack<LC_Array>** derefs; // The array of dereference conditions found in each loop. (loop x
+ // deref-conditions)
+ ExpandArrayStack<ExpandArrayStack<LC_Condition>*>** blockConditions; // The array of block levels of conditions for
+ // each loop. (loop x level x conditions)
+
+ LoopCloneContext(unsigned loopCount, IAllocator* alloc) : alloc(alloc)
+ {
+ optInfo = new (alloc) ExpandArrayStack<LcOptInfo*>*[loopCount];
+ conditions = new (alloc) ExpandArrayStack<LC_Condition>*[loopCount];
+ derefs = new (alloc) ExpandArrayStack<LC_Array>*[loopCount];
+ blockConditions = new (alloc) ExpandArrayStack<ExpandArrayStack<LC_Condition>*>*[loopCount];
+ for (unsigned i = 0; i < loopCount; ++i)
+ {
+ optInfo[i] = nullptr;
+ conditions[i] = nullptr;
+ derefs[i] = nullptr;
+ blockConditions[i] = nullptr;
+ }
+ }
+
+ // Evaluate conditions into a JTRUE stmt and put it in the block. Reverse condition if 'reverse' is true.
+ void CondToStmtInBlock(Compiler* comp, ExpandArrayStack<LC_Condition>& conds, BasicBlock* block, bool reverse);
+
+ // Get all the optimization information for loop "loopNum"; This information is held in "optInfo" array.
+ // If NULL this allocates the optInfo[loopNum] array for "loopNum"
+ ExpandArrayStack<LcOptInfo*>* EnsureLoopOptInfo(unsigned loopNum);
+
+ // Get all the optimization information for loop "loopNum"; This information is held in "optInfo" array.
+ // If NULL this does not allocate the optInfo[loopNum] array for "loopNum"
+ ExpandArrayStack<LcOptInfo*>* GetLoopOptInfo(unsigned loopNum);
+
+ // Cancel all optimizations for loop "loopNum" by clearing out the "conditions" member if non-null
+ // and setting the optInfo to "null.", If "null", then the user of this class is not supposed to
+ // clone this loop.
+ void CancelLoopOptInfo(unsigned loopNum);
+
+ // Get the conditions that decide which loop to take for "loopNum." If NULL allocate an empty array.
+ ExpandArrayStack<LC_Condition>* EnsureConditions(unsigned loopNum);
+
+ // Get the conditions for loop. No allocation is performed.
+ ExpandArrayStack<LC_Condition>* GetConditions(unsigned loopNum);
+
+ // Ensure that the "deref" conditions array is allocated.
+ ExpandArrayStack<LC_Array>* EnsureDerefs(unsigned loopNum);
+
+ // Get block conditions for each loop, no allocation is performed.
+ ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* GetBlockConditions(unsigned loopNum);
+
+ // Ensure that the block condition is present, if not allocate space.
+ ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* EnsureBlockConditions(unsigned loopNum, unsigned totalBlocks);
+
+ // Print the block conditions for the loop.
+ void PrintBlockConditions(unsigned loopNum);
+
+ // Does the loop have block conditions?
+ bool HasBlockConditions(unsigned loopNum);
+
+ // Evaluate the conditions for "loopNum" and indicate if they are either all true or any of them are false.
+ // "pAllTrue" implies all the conditions are statically known to be true.
+ // "pAnyFalse" implies at least one condition is statically known to be false.
+ // If neither of them are true, then some conditions' evaluations are statically unknown.
+ //
+ // If all conditions yield true, then the caller doesn't need to clone the loop, but it can perform
+ // fast path optimizations.
+ // If any condition yields false, then the caller needs to abort cloning the loop (neither clone nor
+ // fast path optimizations.)
+ //
+ // Assumes the conditions involve an AND join operator.
+ void EvaluateConditions(unsigned loopNum, bool* pAllTrue, bool* pAnyFalse DEBUGARG(bool verbose));
+
+private:
+ void OptimizeConditions(ExpandArrayStack<LC_Condition>& conds);
+
+public:
+ // Optimize conditions to remove redundant conditions.
+ void OptimizeConditions(unsigned loopNum DEBUGARG(bool verbose));
+
+ void OptimizeBlockConditions(unsigned loopNum DEBUGARG(bool verbose));
+
+#ifdef DEBUG
+ void PrintConditions(unsigned loopNum);
+#endif
+};