Add LoopUtils class to gather some loop transformation support.

This patch adds LoopUtils class to handle some loop related transformations. For now it has 2 transformations that simplifies other transformations such as loop unroll or unswitch: - Dedicate exit blocks: this ensure that all exit basic block (out-of-loop basic blocks that have a predecessor in the loop) have all their predecessors in the loop; - Loop Closed SSA (LCSSA): this ensure that all definitions in a loop are used inside the loop or in a phi instruction in an exit basic block. It also adds the following capabilities: - Loop::IsLCSSA to test if the loop is in a LCSSA form - Loop::GetOrCreatePreHeaderBlock that can build a loop preheader if required; - New methods to allow on the fly updates of the loop descriptors. - New methods to allow on the fly updates of the CFG analysis. - Instruction::SetOperand to allow expression of the index relative to Instruction::NumOperands (to be compatible with the index returned by DefUseManager::ForEachUse)
author: Victor Lomuller <victor@codeplay.com> 2018-01-26 12:07:10 +0000
committer: Steven Perron <stevenperron@google.com> 2018-02-01 15:35:09 -0500
commit: 50e85c865ca9c4b53e2724f36a84fb2566c1ce97 (patch)
tree: 28907f9d378f13af982e73ecdbe15ad529afa506 /source/opt/loop_utils.cpp
parent: 61d8c0384b8400e5a88febb457611d22e36a76c8 (diff)
download: SPIRV-Tools-50e85c865ca9c4b53e2724f36a84fb2566c1ce97.tar.gz
SPIRV-Tools-50e85c865ca9c4b53e2724f36a84fb2566c1ce97.tar.bz2
SPIRV-Tools-50e85c865ca9c4b53e2724f36a84fb2566c1ce97.zip
1 files changed, 485 insertions, 0 deletions
diff --git a/source/opt/loop_utils.cpp b/source/opt/loop_utils.cpp
new file mode 100644
index 00000000..86bc2eb5
--- /dev/null
+++ b/source/opt/loop_utils.cpp
@@ -0,0 +1,485 @@
+// Copyright (c) 2018 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <memory>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "opt/cfg.h"
+#include "opt/ir_builder.h"
+#include "opt/ir_context.h"
+#include "opt/loop_descriptor.h"
+#include "opt/loop_utils.h"
+
+namespace spvtools {
+namespace opt {
+
+namespace {
+// Return true if |bb| is dominated by at least one block in |exits|
+static inline bool DominatesAnExit(
+    ir::BasicBlock* bb, const std::unordered_set<ir::BasicBlock*>& exits,
+    const opt::DominatorTree& dom_tree) {
+  for (ir::BasicBlock* e_bb : exits)
+    if (dom_tree.Dominates(bb, e_bb)) return true;
+  return false;
+}
+
+// Utility class to rewrite out-of-loop uses of an in-loop definition in terms
+// of phi instructions to achieve a LCSSA form.
+// For a given definition, the class user registers phi instructions using that
+// definition in all loop exit blocks by which the definition escapes.
+// Then, when rewriting a use of the definition, the rewriter walks the
+// paths from the use the loop exits. At each step, it will insert a phi
+// instruction to merge the incoming value according to exit blocks definition.
+class LCSSARewriter {
+ public:
+  LCSSARewriter(ir::IRContext* context, const opt::DominatorTree& dom_tree,
+                const std::unordered_set<ir::BasicBlock*>& exit_bb,
+                ir::BasicBlock* merge_block)
+      : context_(context),
+        cfg_(context_->cfg()),
+        dom_tree_(dom_tree),
+        exit_bb_(exit_bb),
+        merge_block_id_(merge_block ? merge_block->id() : 0) {}
+
+  struct UseRewriter {
+    explicit UseRewriter(LCSSARewriter* base, const ir::Instruction& def_insn)
+        : base_(base), def_insn_(def_insn) {}
+    // Rewrites the use of |def_insn_| by the instruction |user| at the index
+    // |operand_index| in terms of phi instruction. This recursively builds new
+    // phi instructions from |user| to the loop exit blocks' phis. The use of
+    // |def_insn_| in |user| is replaced by the relevant phi instruction at the
+    // end of the operation.
+    // It is assumed that |user| does not dominates any of the loop exit basic
+    // block. This operation does not update the def/use manager, instead it
+    // records what needs to be updated. The actual update is performed by
+    // UpdateManagers.
+    void RewriteUse(ir::BasicBlock* bb, ir::Instruction* user,
+                    uint32_t operand_index) {
+      assert(
+          (user->opcode() != SpvOpPhi || bb != GetParent(user)) &&
+          "The root basic block must be the incoming edge if |user| is a phi "
+          "instruction");
+      assert((user->opcode() == SpvOpPhi || bb == GetParent(user)) &&
+             "The root basic block must be the instruction parent if |user| is "
+             "not "
+             "phi instruction");
+
+      ir::Instruction* new_def = GetOrBuildIncoming(bb->id());
+
+      user->SetOperand(operand_index, {new_def->result_id()});
+      rewritten_.insert(user);
+    }
+
+    // In-place update of some managers (avoid full invalidation).
+    inline void UpdateManagers() {
+      opt::analysis::DefUseManager* def_use_mgr =
+          base_->context_->get_def_use_mgr();
+      // Register all new definitions.
+      for (ir::Instruction* insn : rewritten_) {
+        def_use_mgr->AnalyzeInstDef(insn);
+      }
+      // Register all new uses.
+      for (ir::Instruction* insn : rewritten_) {
+        def_use_mgr->AnalyzeInstUse(insn);
+      }
+    }
+
+   private:
+    // Return the basic block that |instr| belongs to.
+    ir::BasicBlock* GetParent(ir::Instruction* instr) {
+      return base_->context_->get_instr_block(instr);
+    }
+
+    // Builds a phi instruction for the basic block |bb|. The function assumes
+    // that |defining_blocks| contains the list of basic block that define the
+    // usable value for each predecessor of |bb|.
+    inline ir::Instruction* CreatePhiInstruction(
+        ir::BasicBlock* bb, const std::vector<uint32_t>& defining_blocks) {
+      std::vector<uint32_t> incomings;
+      const std::vector<uint32_t>& bb_preds = base_->cfg_->preds(bb->id());
+      assert(bb_preds.size() == defining_blocks.size());
+      for (size_t i = 0; i < bb_preds.size(); i++) {
+        incomings.push_back(
+            GetOrBuildIncoming(defining_blocks[i])->result_id());
+        incomings.push_back(bb_preds[i]);
+      }
+      opt::InstructionBuilder builder(
+          base_->context_, &*bb->begin(),
+          ir::IRContext::kAnalysisInstrToBlockMapping);
+      ir::Instruction* incoming_phi =
+          builder.AddPhi(def_insn_.type_id(), incomings);
+
+      rewritten_.insert(incoming_phi);
+      return incoming_phi;
+    }
+
+    // Builds a phi instruction for the basic block |bb|, all incoming values
+    // will be |value|.
+    inline ir::Instruction* CreatePhiInstruction(ir::BasicBlock* bb,
+                                                 const ir::Instruction& value) {
+      std::vector<uint32_t> incomings;
+      const std::vector<uint32_t>& bb_preds = base_->cfg_->preds(bb->id());
+      for (size_t i = 0; i < bb_preds.size(); i++) {
+        incomings.push_back(value.result_id());
+        incomings.push_back(bb_preds[i]);
+      }
+      opt::InstructionBuilder builder(
+          base_->context_, &*bb->begin(),
+          ir::IRContext::kAnalysisInstrToBlockMapping);
+      ir::Instruction* incoming_phi =
+          builder.AddPhi(def_insn_.type_id(), incomings);
+
+      rewritten_.insert(incoming_phi);
+      return incoming_phi;
+    }
+
+    // Return the new def to use for the basic block |bb_id|.
+    // If |bb_id| does not have a suitable def to use then we:
+    //   - return the common def used by all predecessors;
+    //   - if there is no common def, then we build a new phi instr at the
+    //     beginning of |bb_id| and return this new instruction.
+    ir::Instruction* GetOrBuildIncoming(uint32_t bb_id) {
+      assert(base_->cfg_->block(bb_id) != nullptr && "Unknown basic block");
+
+      ir::Instruction*& incoming_phi = bb_to_phi_[bb_id];
+      if (incoming_phi) {
+        return incoming_phi;
+      }
+
+      ir::BasicBlock* bb = &*base_->cfg_->block(bb_id);
+      // If this is an exit basic block, look if there already is an eligible
+      // phi instruction. An eligible phi has |def_insn_| as all incoming
+      // values.
+      if (base_->exit_bb_.count(bb)) {
+        // Look if there is an eligible phi in this block.
+        if (!bb->WhileEachPhiInst([&incoming_phi, this](ir::Instruction* phi) {
+              for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
+                if (phi->GetSingleWordInOperand(i) != def_insn_.result_id())
+                  return true;
+              }
+              incoming_phi = phi;
+              rewritten_.insert(incoming_phi);
+              return false;
+            })) {
+          return incoming_phi;
+        }
+        incoming_phi = CreatePhiInstruction(bb, def_insn_);
+        return incoming_phi;
+      }
+
+      // Get the block that defines the value to use for each predecessor.
+      // If the vector has 1 value, then it means that this block does not need
+      // to build a phi instruction unless |bb_id| is the loop merge block.
+      const std::vector<uint32_t>& defining_blocks =
+          base_->GetDefiningBlocks(bb_id);
+
+      // Special case for structured loops: merge block might be different from
+      // the exit block set. To maintain structured properties it will ease
+      // transformations if the merge block also holds a phi instruction like
+      // the exit ones.
+      if (defining_blocks.size() > 1 || bb_id == base_->merge_block_id_) {
+        if (defining_blocks.size() > 1) {
+          incoming_phi = CreatePhiInstruction(bb, defining_blocks);
+        } else {
+          assert(bb_id == base_->merge_block_id_);
+          incoming_phi =
+              CreatePhiInstruction(bb, *GetOrBuildIncoming(defining_blocks[0]));
+        }
+      } else {
+        incoming_phi = GetOrBuildIncoming(defining_blocks[0]);
+      }
+
+      return incoming_phi;
+    }
+
+    LCSSARewriter* base_;
+    const ir::Instruction& def_insn_;
+    std::unordered_map<uint32_t, ir::Instruction*> bb_to_phi_;
+    std::unordered_set<ir::Instruction*> rewritten_;
+  };
+
+ private:
+  // Return the new def to use for the basic block |bb_id|.
+  // If |bb_id| does not have a suitable def to use then we:
+  //   - return the common def used by all predecessors;
+  //   - if there is no common def, then we build a new phi instr at the
+  //     beginning of |bb_id| and return this new instruction.
+  const std::vector<uint32_t>& GetDefiningBlocks(uint32_t bb_id) {
+    assert(cfg_->block(bb_id) != nullptr && "Unknown basic block");
+    std::vector<uint32_t>& defining_blocks = bb_to_defining_blocks_[bb_id];
+
+    if (defining_blocks.size()) return defining_blocks;
+
+    // Check if one of the loop exit basic block dominates |bb_id|.
+    for (const ir::BasicBlock* e_bb : exit_bb_) {
+      if (dom_tree_.Dominates(e_bb->id(), bb_id)) {
+        defining_blocks.push_back(e_bb->id());
+        return defining_blocks;
+      }
+    }
+
+    // Process parents, they will returns their suitable blocks.
+    // If they are all the same, this means this basic block is dominated by a
+    // common block, so we won't need to build a phi instruction.
+    for (uint32_t pred_id : cfg_->preds(bb_id)) {
+      const std::vector<uint32_t>& pred_blocks = GetDefiningBlocks(pred_id);
+      if (pred_blocks.size() == 1)
+        defining_blocks.push_back(pred_blocks[0]);
+      else
+        defining_blocks.push_back(pred_id);
+    }
+    assert(defining_blocks.size());
+    if (std::all_of(defining_blocks.begin(), defining_blocks.end(),
+                    [&defining_blocks](uint32_t id) {
+                      return id == defining_blocks[0];
+                    })) {
+      // No need for a phi.
+      defining_blocks.resize(1);
+    }
+
+    return defining_blocks;
+  }
+
+  ir::IRContext* context_;
+  ir::CFG* cfg_;
+  const opt::DominatorTree& dom_tree_;
+  const std::unordered_set<ir::BasicBlock*>& exit_bb_;
+  uint32_t merge_block_id_;
+  // This map represent the set of known paths. For each key, the vector
+  // represent the set of blocks holding the definition to be used to build the
+  // phi instruction.
+  // If the vector has 0 value, then the path is unknown yet, and must be built.
+  // If the vector has 1 value, then the value defined by that basic block
+  //   should be used.
+  // If the vector has more than 1 value, then a phi node must be created, the
+  //   basic block ordering is the same as the predecessor ordering.
+  std::unordered_map<uint32_t, std::vector<uint32_t>> bb_to_defining_blocks_;
+};
+
+// Make the set |blocks| closed SSA. The set is closed SSA if all the uses
+// outside the set are phi instructions in exiting basic block set (hold by
+// |lcssa_rewriter|).
+inline void MakeSetClosedSSA(ir::IRContext* context, ir::Function* function,
+                             const std::unordered_set<uint32_t>& blocks,
+                             const std::unordered_set<ir::BasicBlock*>& exit_bb,
+                             LCSSARewriter* lcssa_rewriter) {
+  ir::CFG& cfg = *context->cfg();
+  opt::DominatorTree& dom_tree =
+      context->GetDominatorAnalysis(function, cfg)->GetDomTree();
+  opt::analysis::DefUseManager* def_use_manager = context->get_def_use_mgr();
+
+  for (uint32_t bb_id : blocks) {
+    ir::BasicBlock* bb = cfg.block(bb_id);
+    // If bb does not dominate an exit block, then it cannot have escaping defs.
+    if (!DominatesAnExit(bb, exit_bb, dom_tree)) continue;
+    for (ir::Instruction& inst : *bb) {
+      LCSSARewriter::UseRewriter rewriter(lcssa_rewriter, inst);
+      def_use_manager->ForEachUse(
+          &inst, [&blocks, &rewriter, &exit_bb, context](
+                     ir::Instruction* use, uint32_t operand_index) {
+            ir::BasicBlock* use_parent = context->get_instr_block(use);
+            assert(use_parent);
+            if (blocks.count(use_parent->id())) return;
+
+            if (use->opcode() == SpvOpPhi) {
+              // If the use is a Phi instruction and the incoming block is
+              // coming from the loop, then that's consistent with LCSSA form.
+              if (exit_bb.count(use_parent)) {
+                return;
+              } else {
+                // That's not an exit block, but the user is a phi instruction.
+                // Consider the incoming branch only.
+                use_parent = context->get_instr_block(
+                    use->GetSingleWordOperand(operand_index + 1));
+              }
+            }
+            // Rewrite the use. Note that this call does not invalidate the
+            // def/use manager. So this operation is safe.
+            rewriter.RewriteUse(use_parent, use, operand_index);
+          });
+      rewriter.UpdateManagers();
+    }
+  }
+}
+
+}  // namespace
+
+void LoopUtils::CreateLoopDedicatedExits() {
+  ir::Function* function = loop_->GetHeaderBlock()->GetParent();
+  ir::LoopDescriptor& loop_desc = *context_->GetLoopDescriptor(function);
+  ir::CFG& cfg = *context_->cfg();
+  opt::analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
+
+  const ir::IRContext::Analysis PreservedAnalyses =
+      ir::IRContext::kAnalysisDefUse |
+      ir::IRContext::kAnalysisInstrToBlockMapping;
+
+  // Gathers the set of basic block that are not in this loop and have at least
+  // one predecessor in the loop and one not in the loop.
+  std::unordered_set<uint32_t> exit_bb_set;
+  loop_->GetExitBlocks(context_, &exit_bb_set);
+
+  std::unordered_set<ir::BasicBlock*> new_loop_exits;
+  bool made_change = false;
+  // For each block, we create a new one that gathers all branches from
+  // the loop and fall into the block.
+  for (uint32_t non_dedicate_id : exit_bb_set) {
+    ir::BasicBlock* non_dedicate = cfg.block(non_dedicate_id);
+    const std::vector<uint32_t>& bb_pred = cfg.preds(non_dedicate_id);
+    // Ignore the block if all the predecessors are in the loop.
+    if (std::all_of(bb_pred.begin(), bb_pred.end(),
+                    [this](uint32_t id) { return loop_->IsInsideLoop(id); })) {
+      new_loop_exits.insert(non_dedicate);
+      continue;
+    }
+
+    made_change = true;
+    ir::Function::iterator insert_pt = function->begin();
+    for (; insert_pt != function->end() && &*insert_pt != non_dedicate;
+         ++insert_pt) {
+    }
+    assert(insert_pt != function->end() && "Basic Block not found");
+
+    // Create the dedicate exit basic block.
+    ir::BasicBlock& exit = *insert_pt.InsertBefore(
+        std::unique_ptr<ir::BasicBlock>(new ir::BasicBlock(
+            std::unique_ptr<ir::Instruction>(new ir::Instruction(
+                context_, SpvOpLabel, 0, context_->TakeNextId(), {})))));
+    exit.SetParent(function);
+
+    // Redirect in loop predecessors to |exit| block.
+    for (uint32_t exit_pred_id : bb_pred) {
+      if (loop_->IsInsideLoop(exit_pred_id)) {
+        ir::BasicBlock* pred_block = cfg.block(exit_pred_id);
+        pred_block->ForEachSuccessorLabel([non_dedicate, &exit](uint32_t* id) {
+          if (*id == non_dedicate->id()) *id = exit.id();
+        });
+        // Update the CFG.
+        // |non_dedicate|'s predecessor list will be updated at the end of the
+        // loop.
+        cfg.RegisterBlock(pred_block);
+      }
+    }
+
+    // Register the label to the def/use manager, requires for the phi patching.
+    def_use_mgr->AnalyzeInstDefUse(exit.GetLabelInst());
+    context_->set_instr_block(exit.GetLabelInst(), &exit);
+
+    opt::InstructionBuilder builder(context_, &exit, PreservedAnalyses);
+    // Now jump from our dedicate basic block to the old exit.
+    // We also reset the insert point so all instructions are inserted before
+    // the branch.
+    builder.SetInsertPoint(builder.AddBranch(non_dedicate->id()));
+    non_dedicate->ForEachPhiInst([&builder, &exit, def_use_mgr,
+                                  this](ir::Instruction* phi) {
+      // New phi operands for this instruction.
+      std::vector<uint32_t> new_phi_op;
+      // Phi operands for the dedicated exit block.
+      std::vector<uint32_t> exit_phi_op;
+      for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
+        uint32_t def_id = phi->GetSingleWordInOperand(i);
+        uint32_t incoming_id = phi->GetSingleWordInOperand(i + 1);
+        if (loop_->IsInsideLoop(incoming_id)) {
+          exit_phi_op.push_back(def_id);
+          exit_phi_op.push_back(incoming_id);
+        } else {
+          new_phi_op.push_back(def_id);
+          new_phi_op.push_back(incoming_id);
+        }
+      }
+
+      // Build the new phi instruction dedicated exit block.
+      ir::Instruction* exit_phi = builder.AddPhi(phi->type_id(), exit_phi_op);
+      // Build the new incoming branch.
+      new_phi_op.push_back(exit_phi->result_id());
+      new_phi_op.push_back(exit.id());
+      // Rewrite operands.
+      uint32_t idx = 0;
+      for (; idx < new_phi_op.size(); idx++)
+        phi->SetInOperand(idx, {new_phi_op[idx]});
+      // Remove extra operands, from last to first (more efficient).
+      for (uint32_t j = phi->NumInOperands() - 1; j >= idx; j--)
+        phi->RemoveInOperand(j);
+      // Update the def/use manager for this |phi|.
+      def_use_mgr->AnalyzeInstUse(phi);
+    });
+    // Update the CFG.
+    cfg.RegisterBlock(&exit);
+    cfg.RemoveNonExistingEdges(non_dedicate->id());
+    new_loop_exits.insert(&exit);
+    // If non_dedicate is in a loop, add the new dedicated exit in that loop.
+    if (ir::Loop* parent_loop = loop_desc[non_dedicate])
+      parent_loop->AddBasicBlock(&exit);
+  }
+
+  if (new_loop_exits.size() == 1) {
+    loop_->SetMergeBlock(*new_loop_exits.begin());
+  }
+
+  if (made_change) {
+    context_->InvalidateAnalysesExceptFor(
+        PreservedAnalyses | ir::IRContext::kAnalysisCFG |
+        ir::IRContext::Analysis::kAnalysisLoopAnalysis);
+  }
+}
+
+void LoopUtils::MakeLoopClosedSSA() {
+  CreateLoopDedicatedExits();
+
+  ir::Function* function = loop_->GetHeaderBlock()->GetParent();
+  ir::CFG& cfg = *context_->cfg();
+  opt::DominatorTree& dom_tree =
+      context_->GetDominatorAnalysis(function, cfg)->GetDomTree();
+
+  std::unordered_set<ir::BasicBlock*> exit_bb;
+  {
+    std::unordered_set<uint32_t> exit_bb_id;
+    loop_->GetExitBlocks(context_, &exit_bb_id);
+    for (uint32_t bb_id : exit_bb_id) {
+      exit_bb.insert(cfg.block(bb_id));
+    }
+  }
+
+  LCSSARewriter lcssa_rewriter(context_, dom_tree, exit_bb,
+                               loop_->GetMergeBlock());
+  MakeSetClosedSSA(context_, function, loop_->GetBlocks(), exit_bb,
+                   &lcssa_rewriter);
+
+  // Make sure all defs post-dominated by the merge block have their last use no
+  // further than the merge block.
+  if (loop_->GetMergeBlock()) {
+    std::unordered_set<uint32_t> merging_bb_id;
+    loop_->GetMergingBlocks(context_, &merging_bb_id);
+    merging_bb_id.erase(loop_->GetMergeBlock()->id());
+    // Reset the exit set, now only the merge block is the exit.
+    exit_bb.clear();
+    exit_bb.insert(loop_->GetMergeBlock());
+    // LCSSARewriter is reusable here only because it forces the creation of a
+    // phi instruction in the merge block.
+    MakeSetClosedSSA(context_, function, merging_bb_id, exit_bb,
+                     &lcssa_rewriter);
+  }
+
+  context_->InvalidateAnalysesExceptFor(
+      ir::IRContext::Analysis::kAnalysisDefUse |
+      ir::IRContext::Analysis::kAnalysisCFG |
+      ir::IRContext::Analysis::kAnalysisDominatorAnalysis |
+      ir::IRContext::Analysis::kAnalysisLoopAnalysis);
+}
+
+}  // namespace opt
+}  // namespace spvtools
author	Victor Lomuller <victor@codeplay.com>	2018-01-26 12:07:10 +0000
committer	Steven Perron <stevenperron@google.com>	2018-02-01 15:35:09 -0500
commit	50e85c865ca9c4b53e2724f36a84fb2566c1ce97 (patch)
tree	28907f9d378f13af982e73ecdbe15ad529afa506 /source/opt/loop_utils.cpp
parent	61d8c0384b8400e5a88febb457611d22e36a76c8 (diff)
download	SPIRV-Tools-50e85c865ca9c4b53e2724f36a84fb2566c1ce97.tar.gz SPIRV-Tools-50e85c865ca9c4b53e2724f36a84fb2566c1ce97.tar.bz2 SPIRV-Tools-50e85c865ca9c4b53e2724f36a84fb2566c1ce97.zip