diff options
author | Victor Lomuller <victor@codeplay.com> | 2018-01-26 12:07:10 +0000 |
---|---|---|
committer | Steven Perron <stevenperron@google.com> | 2018-02-01 15:35:09 -0500 |
commit | 50e85c865ca9c4b53e2724f36a84fb2566c1ce97 (patch) | |
tree | 28907f9d378f13af982e73ecdbe15ad529afa506 /source/opt/loop_utils.cpp | |
parent | 61d8c0384b8400e5a88febb457611d22e36a76c8 (diff) | |
download | SPIRV-Tools-50e85c865ca9c4b53e2724f36a84fb2566c1ce97.tar.gz SPIRV-Tools-50e85c865ca9c4b53e2724f36a84fb2566c1ce97.tar.bz2 SPIRV-Tools-50e85c865ca9c4b53e2724f36a84fb2566c1ce97.zip |
Add LoopUtils class to gather some loop transformation support.
This patch adds LoopUtils class to handle some loop related transformations. For now it has 2 transformations that simplifies other transformations such as loop unroll or unswitch:
- Dedicate exit blocks: this ensure that all exit basic block
(out-of-loop basic blocks that have a predecessor in the loop)
have all their predecessors in the loop;
- Loop Closed SSA (LCSSA): this ensure that all definitions in a loop are used inside the loop
or in a phi instruction in an exit basic block.
It also adds the following capabilities:
- Loop::IsLCSSA to test if the loop is in a LCSSA form
- Loop::GetOrCreatePreHeaderBlock that can build a loop preheader if required;
- New methods to allow on the fly updates of the loop descriptors.
- New methods to allow on the fly updates of the CFG analysis.
- Instruction::SetOperand to allow expression of the index relative to Instruction::NumOperands (to be compatible with the index returned by DefUseManager::ForEachUse)
Diffstat (limited to 'source/opt/loop_utils.cpp')
-rw-r--r-- | source/opt/loop_utils.cpp | 485 |
1 files changed, 485 insertions, 0 deletions
diff --git a/source/opt/loop_utils.cpp b/source/opt/loop_utils.cpp new file mode 100644 index 00000000..86bc2eb5 --- /dev/null +++ b/source/opt/loop_utils.cpp @@ -0,0 +1,485 @@ +// Copyright (c) 2018 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <algorithm> +#include <memory> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#include "opt/cfg.h" +#include "opt/ir_builder.h" +#include "opt/ir_context.h" +#include "opt/loop_descriptor.h" +#include "opt/loop_utils.h" + +namespace spvtools { +namespace opt { + +namespace { +// Return true if |bb| is dominated by at least one block in |exits| +static inline bool DominatesAnExit( + ir::BasicBlock* bb, const std::unordered_set<ir::BasicBlock*>& exits, + const opt::DominatorTree& dom_tree) { + for (ir::BasicBlock* e_bb : exits) + if (dom_tree.Dominates(bb, e_bb)) return true; + return false; +} + +// Utility class to rewrite out-of-loop uses of an in-loop definition in terms +// of phi instructions to achieve a LCSSA form. +// For a given definition, the class user registers phi instructions using that +// definition in all loop exit blocks by which the definition escapes. +// Then, when rewriting a use of the definition, the rewriter walks the +// paths from the use the loop exits. At each step, it will insert a phi +// instruction to merge the incoming value according to exit blocks definition. +class LCSSARewriter { + public: + LCSSARewriter(ir::IRContext* context, const opt::DominatorTree& dom_tree, + const std::unordered_set<ir::BasicBlock*>& exit_bb, + ir::BasicBlock* merge_block) + : context_(context), + cfg_(context_->cfg()), + dom_tree_(dom_tree), + exit_bb_(exit_bb), + merge_block_id_(merge_block ? merge_block->id() : 0) {} + + struct UseRewriter { + explicit UseRewriter(LCSSARewriter* base, const ir::Instruction& def_insn) + : base_(base), def_insn_(def_insn) {} + // Rewrites the use of |def_insn_| by the instruction |user| at the index + // |operand_index| in terms of phi instruction. This recursively builds new + // phi instructions from |user| to the loop exit blocks' phis. The use of + // |def_insn_| in |user| is replaced by the relevant phi instruction at the + // end of the operation. + // It is assumed that |user| does not dominates any of the loop exit basic + // block. This operation does not update the def/use manager, instead it + // records what needs to be updated. The actual update is performed by + // UpdateManagers. + void RewriteUse(ir::BasicBlock* bb, ir::Instruction* user, + uint32_t operand_index) { + assert( + (user->opcode() != SpvOpPhi || bb != GetParent(user)) && + "The root basic block must be the incoming edge if |user| is a phi " + "instruction"); + assert((user->opcode() == SpvOpPhi || bb == GetParent(user)) && + "The root basic block must be the instruction parent if |user| is " + "not " + "phi instruction"); + + ir::Instruction* new_def = GetOrBuildIncoming(bb->id()); + + user->SetOperand(operand_index, {new_def->result_id()}); + rewritten_.insert(user); + } + + // In-place update of some managers (avoid full invalidation). + inline void UpdateManagers() { + opt::analysis::DefUseManager* def_use_mgr = + base_->context_->get_def_use_mgr(); + // Register all new definitions. + for (ir::Instruction* insn : rewritten_) { + def_use_mgr->AnalyzeInstDef(insn); + } + // Register all new uses. + for (ir::Instruction* insn : rewritten_) { + def_use_mgr->AnalyzeInstUse(insn); + } + } + + private: + // Return the basic block that |instr| belongs to. + ir::BasicBlock* GetParent(ir::Instruction* instr) { + return base_->context_->get_instr_block(instr); + } + + // Builds a phi instruction for the basic block |bb|. The function assumes + // that |defining_blocks| contains the list of basic block that define the + // usable value for each predecessor of |bb|. + inline ir::Instruction* CreatePhiInstruction( + ir::BasicBlock* bb, const std::vector<uint32_t>& defining_blocks) { + std::vector<uint32_t> incomings; + const std::vector<uint32_t>& bb_preds = base_->cfg_->preds(bb->id()); + assert(bb_preds.size() == defining_blocks.size()); + for (size_t i = 0; i < bb_preds.size(); i++) { + incomings.push_back( + GetOrBuildIncoming(defining_blocks[i])->result_id()); + incomings.push_back(bb_preds[i]); + } + opt::InstructionBuilder builder( + base_->context_, &*bb->begin(), + ir::IRContext::kAnalysisInstrToBlockMapping); + ir::Instruction* incoming_phi = + builder.AddPhi(def_insn_.type_id(), incomings); + + rewritten_.insert(incoming_phi); + return incoming_phi; + } + + // Builds a phi instruction for the basic block |bb|, all incoming values + // will be |value|. + inline ir::Instruction* CreatePhiInstruction(ir::BasicBlock* bb, + const ir::Instruction& value) { + std::vector<uint32_t> incomings; + const std::vector<uint32_t>& bb_preds = base_->cfg_->preds(bb->id()); + for (size_t i = 0; i < bb_preds.size(); i++) { + incomings.push_back(value.result_id()); + incomings.push_back(bb_preds[i]); + } + opt::InstructionBuilder builder( + base_->context_, &*bb->begin(), + ir::IRContext::kAnalysisInstrToBlockMapping); + ir::Instruction* incoming_phi = + builder.AddPhi(def_insn_.type_id(), incomings); + + rewritten_.insert(incoming_phi); + return incoming_phi; + } + + // Return the new def to use for the basic block |bb_id|. + // If |bb_id| does not have a suitable def to use then we: + // - return the common def used by all predecessors; + // - if there is no common def, then we build a new phi instr at the + // beginning of |bb_id| and return this new instruction. + ir::Instruction* GetOrBuildIncoming(uint32_t bb_id) { + assert(base_->cfg_->block(bb_id) != nullptr && "Unknown basic block"); + + ir::Instruction*& incoming_phi = bb_to_phi_[bb_id]; + if (incoming_phi) { + return incoming_phi; + } + + ir::BasicBlock* bb = &*base_->cfg_->block(bb_id); + // If this is an exit basic block, look if there already is an eligible + // phi instruction. An eligible phi has |def_insn_| as all incoming + // values. + if (base_->exit_bb_.count(bb)) { + // Look if there is an eligible phi in this block. + if (!bb->WhileEachPhiInst([&incoming_phi, this](ir::Instruction* phi) { + for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) { + if (phi->GetSingleWordInOperand(i) != def_insn_.result_id()) + return true; + } + incoming_phi = phi; + rewritten_.insert(incoming_phi); + return false; + })) { + return incoming_phi; + } + incoming_phi = CreatePhiInstruction(bb, def_insn_); + return incoming_phi; + } + + // Get the block that defines the value to use for each predecessor. + // If the vector has 1 value, then it means that this block does not need + // to build a phi instruction unless |bb_id| is the loop merge block. + const std::vector<uint32_t>& defining_blocks = + base_->GetDefiningBlocks(bb_id); + + // Special case for structured loops: merge block might be different from + // the exit block set. To maintain structured properties it will ease + // transformations if the merge block also holds a phi instruction like + // the exit ones. + if (defining_blocks.size() > 1 || bb_id == base_->merge_block_id_) { + if (defining_blocks.size() > 1) { + incoming_phi = CreatePhiInstruction(bb, defining_blocks); + } else { + assert(bb_id == base_->merge_block_id_); + incoming_phi = + CreatePhiInstruction(bb, *GetOrBuildIncoming(defining_blocks[0])); + } + } else { + incoming_phi = GetOrBuildIncoming(defining_blocks[0]); + } + + return incoming_phi; + } + + LCSSARewriter* base_; + const ir::Instruction& def_insn_; + std::unordered_map<uint32_t, ir::Instruction*> bb_to_phi_; + std::unordered_set<ir::Instruction*> rewritten_; + }; + + private: + // Return the new def to use for the basic block |bb_id|. + // If |bb_id| does not have a suitable def to use then we: + // - return the common def used by all predecessors; + // - if there is no common def, then we build a new phi instr at the + // beginning of |bb_id| and return this new instruction. + const std::vector<uint32_t>& GetDefiningBlocks(uint32_t bb_id) { + assert(cfg_->block(bb_id) != nullptr && "Unknown basic block"); + std::vector<uint32_t>& defining_blocks = bb_to_defining_blocks_[bb_id]; + + if (defining_blocks.size()) return defining_blocks; + + // Check if one of the loop exit basic block dominates |bb_id|. + for (const ir::BasicBlock* e_bb : exit_bb_) { + if (dom_tree_.Dominates(e_bb->id(), bb_id)) { + defining_blocks.push_back(e_bb->id()); + return defining_blocks; + } + } + + // Process parents, they will returns their suitable blocks. + // If they are all the same, this means this basic block is dominated by a + // common block, so we won't need to build a phi instruction. + for (uint32_t pred_id : cfg_->preds(bb_id)) { + const std::vector<uint32_t>& pred_blocks = GetDefiningBlocks(pred_id); + if (pred_blocks.size() == 1) + defining_blocks.push_back(pred_blocks[0]); + else + defining_blocks.push_back(pred_id); + } + assert(defining_blocks.size()); + if (std::all_of(defining_blocks.begin(), defining_blocks.end(), + [&defining_blocks](uint32_t id) { + return id == defining_blocks[0]; + })) { + // No need for a phi. + defining_blocks.resize(1); + } + + return defining_blocks; + } + + ir::IRContext* context_; + ir::CFG* cfg_; + const opt::DominatorTree& dom_tree_; + const std::unordered_set<ir::BasicBlock*>& exit_bb_; + uint32_t merge_block_id_; + // This map represent the set of known paths. For each key, the vector + // represent the set of blocks holding the definition to be used to build the + // phi instruction. + // If the vector has 0 value, then the path is unknown yet, and must be built. + // If the vector has 1 value, then the value defined by that basic block + // should be used. + // If the vector has more than 1 value, then a phi node must be created, the + // basic block ordering is the same as the predecessor ordering. + std::unordered_map<uint32_t, std::vector<uint32_t>> bb_to_defining_blocks_; +}; + +// Make the set |blocks| closed SSA. The set is closed SSA if all the uses +// outside the set are phi instructions in exiting basic block set (hold by +// |lcssa_rewriter|). +inline void MakeSetClosedSSA(ir::IRContext* context, ir::Function* function, + const std::unordered_set<uint32_t>& blocks, + const std::unordered_set<ir::BasicBlock*>& exit_bb, + LCSSARewriter* lcssa_rewriter) { + ir::CFG& cfg = *context->cfg(); + opt::DominatorTree& dom_tree = + context->GetDominatorAnalysis(function, cfg)->GetDomTree(); + opt::analysis::DefUseManager* def_use_manager = context->get_def_use_mgr(); + + for (uint32_t bb_id : blocks) { + ir::BasicBlock* bb = cfg.block(bb_id); + // If bb does not dominate an exit block, then it cannot have escaping defs. + if (!DominatesAnExit(bb, exit_bb, dom_tree)) continue; + for (ir::Instruction& inst : *bb) { + LCSSARewriter::UseRewriter rewriter(lcssa_rewriter, inst); + def_use_manager->ForEachUse( + &inst, [&blocks, &rewriter, &exit_bb, context]( + ir::Instruction* use, uint32_t operand_index) { + ir::BasicBlock* use_parent = context->get_instr_block(use); + assert(use_parent); + if (blocks.count(use_parent->id())) return; + + if (use->opcode() == SpvOpPhi) { + // If the use is a Phi instruction and the incoming block is + // coming from the loop, then that's consistent with LCSSA form. + if (exit_bb.count(use_parent)) { + return; + } else { + // That's not an exit block, but the user is a phi instruction. + // Consider the incoming branch only. + use_parent = context->get_instr_block( + use->GetSingleWordOperand(operand_index + 1)); + } + } + // Rewrite the use. Note that this call does not invalidate the + // def/use manager. So this operation is safe. + rewriter.RewriteUse(use_parent, use, operand_index); + }); + rewriter.UpdateManagers(); + } + } +} + +} // namespace + +void LoopUtils::CreateLoopDedicatedExits() { + ir::Function* function = loop_->GetHeaderBlock()->GetParent(); + ir::LoopDescriptor& loop_desc = *context_->GetLoopDescriptor(function); + ir::CFG& cfg = *context_->cfg(); + opt::analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); + + const ir::IRContext::Analysis PreservedAnalyses = + ir::IRContext::kAnalysisDefUse | + ir::IRContext::kAnalysisInstrToBlockMapping; + + // Gathers the set of basic block that are not in this loop and have at least + // one predecessor in the loop and one not in the loop. + std::unordered_set<uint32_t> exit_bb_set; + loop_->GetExitBlocks(context_, &exit_bb_set); + + std::unordered_set<ir::BasicBlock*> new_loop_exits; + bool made_change = false; + // For each block, we create a new one that gathers all branches from + // the loop and fall into the block. + for (uint32_t non_dedicate_id : exit_bb_set) { + ir::BasicBlock* non_dedicate = cfg.block(non_dedicate_id); + const std::vector<uint32_t>& bb_pred = cfg.preds(non_dedicate_id); + // Ignore the block if all the predecessors are in the loop. + if (std::all_of(bb_pred.begin(), bb_pred.end(), + [this](uint32_t id) { return loop_->IsInsideLoop(id); })) { + new_loop_exits.insert(non_dedicate); + continue; + } + + made_change = true; + ir::Function::iterator insert_pt = function->begin(); + for (; insert_pt != function->end() && &*insert_pt != non_dedicate; + ++insert_pt) { + } + assert(insert_pt != function->end() && "Basic Block not found"); + + // Create the dedicate exit basic block. + ir::BasicBlock& exit = *insert_pt.InsertBefore( + std::unique_ptr<ir::BasicBlock>(new ir::BasicBlock( + std::unique_ptr<ir::Instruction>(new ir::Instruction( + context_, SpvOpLabel, 0, context_->TakeNextId(), {}))))); + exit.SetParent(function); + + // Redirect in loop predecessors to |exit| block. + for (uint32_t exit_pred_id : bb_pred) { + if (loop_->IsInsideLoop(exit_pred_id)) { + ir::BasicBlock* pred_block = cfg.block(exit_pred_id); + pred_block->ForEachSuccessorLabel([non_dedicate, &exit](uint32_t* id) { + if (*id == non_dedicate->id()) *id = exit.id(); + }); + // Update the CFG. + // |non_dedicate|'s predecessor list will be updated at the end of the + // loop. + cfg.RegisterBlock(pred_block); + } + } + + // Register the label to the def/use manager, requires for the phi patching. + def_use_mgr->AnalyzeInstDefUse(exit.GetLabelInst()); + context_->set_instr_block(exit.GetLabelInst(), &exit); + + opt::InstructionBuilder builder(context_, &exit, PreservedAnalyses); + // Now jump from our dedicate basic block to the old exit. + // We also reset the insert point so all instructions are inserted before + // the branch. + builder.SetInsertPoint(builder.AddBranch(non_dedicate->id())); + non_dedicate->ForEachPhiInst([&builder, &exit, def_use_mgr, + this](ir::Instruction* phi) { + // New phi operands for this instruction. + std::vector<uint32_t> new_phi_op; + // Phi operands for the dedicated exit block. + std::vector<uint32_t> exit_phi_op; + for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) { + uint32_t def_id = phi->GetSingleWordInOperand(i); + uint32_t incoming_id = phi->GetSingleWordInOperand(i + 1); + if (loop_->IsInsideLoop(incoming_id)) { + exit_phi_op.push_back(def_id); + exit_phi_op.push_back(incoming_id); + } else { + new_phi_op.push_back(def_id); + new_phi_op.push_back(incoming_id); + } + } + + // Build the new phi instruction dedicated exit block. + ir::Instruction* exit_phi = builder.AddPhi(phi->type_id(), exit_phi_op); + // Build the new incoming branch. + new_phi_op.push_back(exit_phi->result_id()); + new_phi_op.push_back(exit.id()); + // Rewrite operands. + uint32_t idx = 0; + for (; idx < new_phi_op.size(); idx++) + phi->SetInOperand(idx, {new_phi_op[idx]}); + // Remove extra operands, from last to first (more efficient). + for (uint32_t j = phi->NumInOperands() - 1; j >= idx; j--) + phi->RemoveInOperand(j); + // Update the def/use manager for this |phi|. + def_use_mgr->AnalyzeInstUse(phi); + }); + // Update the CFG. + cfg.RegisterBlock(&exit); + cfg.RemoveNonExistingEdges(non_dedicate->id()); + new_loop_exits.insert(&exit); + // If non_dedicate is in a loop, add the new dedicated exit in that loop. + if (ir::Loop* parent_loop = loop_desc[non_dedicate]) + parent_loop->AddBasicBlock(&exit); + } + + if (new_loop_exits.size() == 1) { + loop_->SetMergeBlock(*new_loop_exits.begin()); + } + + if (made_change) { + context_->InvalidateAnalysesExceptFor( + PreservedAnalyses | ir::IRContext::kAnalysisCFG | + ir::IRContext::Analysis::kAnalysisLoopAnalysis); + } +} + +void LoopUtils::MakeLoopClosedSSA() { + CreateLoopDedicatedExits(); + + ir::Function* function = loop_->GetHeaderBlock()->GetParent(); + ir::CFG& cfg = *context_->cfg(); + opt::DominatorTree& dom_tree = + context_->GetDominatorAnalysis(function, cfg)->GetDomTree(); + + std::unordered_set<ir::BasicBlock*> exit_bb; + { + std::unordered_set<uint32_t> exit_bb_id; + loop_->GetExitBlocks(context_, &exit_bb_id); + for (uint32_t bb_id : exit_bb_id) { + exit_bb.insert(cfg.block(bb_id)); + } + } + + LCSSARewriter lcssa_rewriter(context_, dom_tree, exit_bb, + loop_->GetMergeBlock()); + MakeSetClosedSSA(context_, function, loop_->GetBlocks(), exit_bb, + &lcssa_rewriter); + + // Make sure all defs post-dominated by the merge block have their last use no + // further than the merge block. + if (loop_->GetMergeBlock()) { + std::unordered_set<uint32_t> merging_bb_id; + loop_->GetMergingBlocks(context_, &merging_bb_id); + merging_bb_id.erase(loop_->GetMergeBlock()->id()); + // Reset the exit set, now only the merge block is the exit. + exit_bb.clear(); + exit_bb.insert(loop_->GetMergeBlock()); + // LCSSARewriter is reusable here only because it forces the creation of a + // phi instruction in the merge block. + MakeSetClosedSSA(context_, function, merging_bb_id, exit_bb, + &lcssa_rewriter); + } + + context_->InvalidateAnalysesExceptFor( + ir::IRContext::Analysis::kAnalysisDefUse | + ir::IRContext::Analysis::kAnalysisCFG | + ir::IRContext::Analysis::kAnalysisDominatorAnalysis | + ir::IRContext::Analysis::kAnalysisLoopAnalysis); +} + +} // namespace opt +} // namespace spvtools |