diff options
Diffstat (limited to 'source/opt')
-rw-r--r-- | source/opt/ir_builder.h | 17 | ||||
-rw-r--r-- | source/opt/loop_descriptor.cpp | 177 | ||||
-rw-r--r-- | source/opt/loop_descriptor.h | 34 | ||||
-rw-r--r-- | source/opt/loop_unroller.cpp | 388 | ||||
-rw-r--r-- | source/opt/loop_unroller.h | 6 | ||||
-rw-r--r-- | source/opt/loop_utils.h | 2 | ||||
-rw-r--r-- | source/opt/optimizer.cpp | 4 |
7 files changed, 460 insertions, 168 deletions
diff --git a/source/opt/ir_builder.h b/source/opt/ir_builder.h index 3b75ec6d..a1a1d1e0 100644 --- a/source/opt/ir_builder.h +++ b/source/opt/ir_builder.h @@ -172,11 +172,20 @@ class InstructionBuilder { // Assert that we are not trying to store a negative number in an unsigned // type. if (!sign) - assert(value > 0 && + assert(value >= 0 && "Trying to add a signed integer with an unsigned type!"); - // Get or create the integer type. - analysis::Integer int_type(32, sign); + analysis::Integer int_type{32, sign}; + + // Get or create the integer type. This rebuilds the type and manages the + // memory for the rebuilt type. + uint32_t type_id = + GetContext()->get_type_mgr()->GetTypeInstruction(&int_type); + + // Get the memory managed type so that it is safe to be stored by + // GetConstant. + analysis::Type* rebuilt_type = + GetContext()->get_type_mgr()->GetType(type_id); // Even if the value is negative we need to pass the bit pattern as a // uint32_t to GetConstant. @@ -184,7 +193,7 @@ class InstructionBuilder { // Create the constant value. const opt::analysis::Constant* constant = - GetContext()->get_constant_mgr()->GetConstant(&int_type, {word}); + GetContext()->get_constant_mgr()->GetConstant(rebuilt_type, {word}); // Create the OpConstant instruction using the type and the value. return GetContext()->get_constant_mgr()->GetDefiningInstruction(constant); diff --git a/source/opt/loop_descriptor.cpp b/source/opt/loop_descriptor.cpp index 32765be0..131363c5 100644 --- a/source/opt/loop_descriptor.cpp +++ b/source/opt/loop_descriptor.cpp @@ -33,7 +33,7 @@ namespace ir { // Takes in a phi instruction |induction| and the loop |header| and returns the // step operation of the loop. ir::Instruction* Loop::GetInductionStepOperation( - const ir::Loop* loop, const ir::Instruction* induction) const { + const ir::Instruction* induction) const { // Induction must be a phi instruction. assert(induction->opcode() == SpvOpPhi); @@ -50,7 +50,7 @@ ir::Instruction* Loop::GetInductionStepOperation( // Check if the block is dominated by header, and thus coming from within // the loop. - if (loop->IsInsideLoop(incoming_block)) { + if (IsInsideLoop(incoming_block)) { step = def_use_manager->GetDef( induction->GetSingleWordInOperand(operand_id - 1)); break; @@ -61,6 +61,21 @@ ir::Instruction* Loop::GetInductionStepOperation( return nullptr; } + // The induction variable which binds the loop must only be modified once. + uint32_t lhs = step->GetSingleWordInOperand(0); + uint32_t rhs = step->GetSingleWordInOperand(1); + + // One of the left hand side or right hand side of the step instruction must + // be the induction phi and the other must be an OpConstant. + if (lhs != induction->result_id() && rhs != induction->result_id()) { + return nullptr; + } + + if (def_use_manager->GetDef(lhs)->opcode() != SpvOp::SpvOpConstant && + def_use_manager->GetDef(rhs)->opcode() != SpvOp::SpvOpConstant) { + return nullptr; + } + return step; } @@ -84,17 +99,52 @@ bool Loop::IsSupportedCondition(SpvOp condition) const { // > case SpvOp::SpvOpUGreaterThan: case SpvOp::SpvOpSGreaterThan: + + // >= + case SpvOp::SpvOpSGreaterThanEqual: + case SpvOp::SpvOpUGreaterThanEqual: + // <= + case SpvOp::SpvOpSLessThanEqual: + case SpvOp::SpvOpULessThanEqual: + return true; default: return false; } } +int64_t Loop::GetResidualConditionValue(SpvOp condition, int64_t initial_value, + int64_t step_value, + size_t number_of_iterations, + size_t factor) { + int64_t remainder = + initial_value + (number_of_iterations % factor) * step_value; + + // We subtract or add one as the above formula calculates the remainder if the + // loop where just less than or greater than. Adding or subtracting one should + // give a functionally equivalent value. + switch (condition) { + case SpvOp::SpvOpSGreaterThanEqual: + case SpvOp::SpvOpUGreaterThanEqual: { + remainder -= 1; + break; + } + case SpvOp::SpvOpSLessThanEqual: + case SpvOp::SpvOpULessThanEqual: { + remainder += 1; + break; + } + + default: + break; + } + return remainder; +} + // Extract the initial value from the |induction| OpPhi instruction and store it // in |value|. If the function couldn't find the initial value of |induction| // return false. -bool Loop::GetInductionInitValue(const ir::Loop* loop, - const ir::Instruction* induction, +bool Loop::GetInductionInitValue(const ir::Instruction* induction, int64_t* value) const { ir::Instruction* constant_instruction = nullptr; opt::analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr(); @@ -104,7 +154,7 @@ bool Loop::GetInductionInitValue(const ir::Loop* loop, ir::BasicBlock* bb = context_->cfg()->block( induction->GetSingleWordInOperand(operand_id + 1)); - if (!loop->IsInsideLoop(bb)) { + if (!IsInsideLoop(bb)) { constant_instruction = def_use_manager->GetDef( induction->GetSingleWordInOperand(operand_id)); } @@ -413,6 +463,25 @@ bool Loop::AreAllOperandsOutsideLoop(IRContext* context, Instruction* inst) { return all_outside_loop; } +void Loop::ComputeLoopStructuredOrder( + std::vector<ir::BasicBlock*>* ordered_loop_blocks, bool include_pre_header, + bool include_merge) const { + ir::CFG& cfg = *context_->cfg(); + + // Reserve the memory: all blocks in the loop + extra if needed. + ordered_loop_blocks->reserve(GetBlocks().size() + include_pre_header + + include_merge); + + if (include_pre_header && GetPreHeaderBlock()) + ordered_loop_blocks->push_back(loop_preheader_); + cfg.ForEachBlockInReversePostOrder( + loop_header_, [ordered_loop_blocks, this](BasicBlock* bb) { + if (IsInsideLoop(bb)) ordered_loop_blocks->push_back(bb); + }); + if (include_merge && GetMergeBlock()) + ordered_loop_blocks->push_back(loop_merge_); +} + LoopDescriptor::LoopDescriptor(const Function* f) : loops_() { PopulateList(f); } @@ -550,7 +619,7 @@ bool Loop::FindNumberOfIterations(const ir::Instruction* induction, } // Find the instruction which is stepping through the loop. - ir::Instruction* step_inst = GetInductionStepOperation(this, induction); + ir::Instruction* step_inst = GetInductionStepOperation(induction); if (!step_inst) return false; // Find the constant value used by the condition variable. @@ -577,17 +646,18 @@ bool Loop::FindNumberOfIterations(const ir::Instruction* induction, // Find the inital value of the loop and make sure it is a constant integer. int64_t init_value = 0; - if (!GetInductionInitValue(this, induction, &init_value)) return false; + if (!GetInductionInitValue(induction, &init_value)) return false; // If iterations is non null then store the value in that. - if (iterations_out) { - int64_t num_itrs = GetIterations(condition->opcode(), condition_value, - init_value, step_value); + int64_t num_itrs = GetIterations(condition->opcode(), condition_value, + init_value, step_value); - // If the loop body will not be reached return false. - if (num_itrs <= 0) { - return false; - } + // If the loop body will not be reached return false. + if (num_itrs <= 0) { + return false; + } + + if (iterations_out) { assert(static_cast<size_t>(num_itrs) <= std::numeric_limits<size_t>::max()); *iterations_out = static_cast<size_t>(num_itrs); } @@ -611,26 +681,87 @@ int64_t Loop::GetIterations(SpvOp condition, int64_t condition_value, int64_t init_value, int64_t step_value) const { int64_t diff = 0; - // Take the abs of - step values. - step_value = llabs(step_value); - switch (condition) { case SpvOp::SpvOpSLessThan: case SpvOp::SpvOpULessThan: { + // If the condition is not met to begin with the loop will never iterate. + if (!(init_value < condition_value)) return 0; + diff = condition_value - init_value; + + // If the operation is a less then operation then the diff and step must + // have the same sign otherwise the induction will never cross the + // condition (either never true or always true). + if ((diff < 0 && step_value > 0) || (diff > 0 && step_value < 0)) { + return 0; + } + break; } case SpvOp::SpvOpSGreaterThan: case SpvOp::SpvOpUGreaterThan: { + // If the condition is not met to begin with the loop will never iterate. + if (!(init_value > condition_value)) return 0; + diff = init_value - condition_value; + + // If the operation is a greater than operation then the diff and step + // must have opposite signs. Otherwise the condition will always be true + // or will never be true. + if ((diff < 0 && step_value < 0) || (diff > 0 && step_value > 0)) { + return 0; + } + + break; + } + + case SpvOp::SpvOpSGreaterThanEqual: + case SpvOp::SpvOpUGreaterThanEqual: { + // If the condition is not met to begin with the loop will never iterate. + if (!(init_value >= condition_value)) return 0; + + // We subract one to make it the same as SpvOpGreaterThan as it is + // functionally equivalent. + diff = init_value - (condition_value - 1); + + // If the operation is a greater than operation then the diff and step + // must have opposite signs. Otherwise the condition will always be true + // or will never be true. + if ((diff > 0 && step_value > 0) || (diff < 0 && step_value < 0)) { + return 0; + } + + break; + } + + case SpvOp::SpvOpSLessThanEqual: + case SpvOp::SpvOpULessThanEqual: { + // If the condition is not met to begin with the loop will never iterate. + if (!(init_value <= condition_value)) return 0; + + // We add one to make it the same as SpvOpLessThan as it is functionally + // equivalent. + diff = (condition_value + 1) - init_value; + + // If the operation is a less than operation then the diff and step must + // have the same sign otherwise the induction will never cross the + // condition (either never true or always true). + if ((diff < 0 && step_value > 0) || (diff > 0 && step_value < 0)) { + return 0; + } + break; } + default: assert(false && "Could not retrieve number of iterations from the loop condition. " "Condition is not supported."); } + // Take the abs of - step values. + step_value = llabs(step_value); + diff = llabs(diff); int64_t result = diff / step_value; if (diff % step_value != 0) { @@ -639,7 +770,17 @@ int64_t Loop::GetIterations(SpvOp condition, int64_t condition_value, return result; } -ir::Instruction* Loop::FindInductionVariable( +// Returns the list of induction variables within the loop. +void Loop::GetInductionVariables( + std::vector<ir::Instruction*>& induction_variables) const { + for (ir::Instruction& inst : *loop_header_) { + if (inst.opcode() == SpvOp::SpvOpPhi) { + induction_variables.push_back(&inst); + } + } +} + +ir::Instruction* Loop::FindConditionVariable( const ir::BasicBlock* condition_block) const { // Find the branch instruction. const ir::Instruction& branch_inst = *condition_block->ctail(); diff --git a/source/opt/loop_descriptor.h b/source/opt/loop_descriptor.h index ef5f19f1..d0421d64 100644 --- a/source/opt/loop_descriptor.h +++ b/source/opt/loop_descriptor.h @@ -207,10 +207,13 @@ class Loop { AddBasicBlock(bb); } - // This function uses the |condition| to find the induction variable within - // the loop. This only works if the loop is bound by a single condition and a - // single induction variable. - ir::Instruction* FindInductionVariable(const ir::BasicBlock* condition) const; + // Returns the list of induction variables within the loop. + void GetInductionVariables(std::vector<ir::Instruction*>& inductions) const; + + // This function uses the |condition| to find the induction variable which is + // used by the loop condition within the loop. This only works if the loop is + // bound by a single condition and single induction variable. + ir::Instruction* FindConditionVariable(const ir::BasicBlock* condition) const; // Returns the number of iterations within a loop when given the |induction| // variable and the loop |condition| check. It stores the found number of @@ -275,14 +278,13 @@ class Loop { // Extract the initial value from the |induction| variable and store it in // |value|. If the function couldn't find the initial value of |induction| // return false. - bool GetInductionInitValue(const ir::Loop* loop, - const ir::Instruction* induction, + bool GetInductionInitValue(const ir::Instruction* induction, int64_t* value) const; // Takes in a phi instruction |induction| and the loop |header| and returns // the step operation of the loop. ir::Instruction* GetInductionStepOperation( - const ir::Loop* loop, const ir::Instruction* induction) const; + const ir::Instruction* induction) const; // Returns true if we can deduce the number of loop iterations in the step // operation |step|. IsSupportedCondition must also be true for the condition @@ -294,6 +296,24 @@ class Loop { // the step instruction. bool IsSupportedCondition(SpvOp condition) const; + // Creates the list of the loop's basic block in structured order and store + // the result in |ordered_loop_blocks|. If |include_pre_header| is true, the + // pre-header block will also be included at the beginning of the list if it + // exist. If |include_merge| is true, the merge block will also be included at + // the end of the list if it exist. + void ComputeLoopStructuredOrder( + std::vector<ir::BasicBlock*>* ordered_loop_blocks, + bool include_pre_header = false, bool include_merge = false) const; + + // Given the loop |condition|, |initial_value|, |step_value|, the trip count + // |number_of_iterations|, and the |unroll_factor| requested, get the new + // condition value for the residual loop. + static int64_t GetResidualConditionValue(SpvOp condition, + int64_t initial_value, + int64_t step_value, + size_t number_of_iterations, + size_t unroll_factor); + private: IRContext* context_; // The block which marks the start of the loop. diff --git a/source/opt/loop_unroller.cpp b/source/opt/loop_unroller.cpp index 16031e29..39aa1081 100644 --- a/source/opt/loop_unroller.cpp +++ b/source/opt/loop_unroller.cpp @@ -62,6 +62,12 @@ namespace spvtools { namespace opt { namespace { +// Loop control constant value for DontUnroll flag. +static const uint32_t kLoopControlDontUnrollIndex = 2; + +// Operand index of the loop control parameter of the OpLoopMerge. +static const uint32_t kLoopControlIndex = 2; + // This utility class encapsulates some of the state we need to maintain between // loop unrolls. Specifically it maintains key blocks and the induction variable // in the current loop duplication step and the blocks from the previous one. @@ -79,20 +85,24 @@ struct LoopUnrollState { // Initialize from the loop descriptor class. LoopUnrollState(ir::Instruction* induction, ir::BasicBlock* continue_block, - ir::BasicBlock* condition) + ir::BasicBlock* condition, + std::vector<ir::Instruction*>&& phis) : previous_phi_(induction), previous_continue_block_(continue_block), previous_condition_block_(condition), new_phi(nullptr), new_continue_block(nullptr), new_condition_block(nullptr), - new_header_block(nullptr) {} + new_header_block(nullptr) { + previous_phis_ = std::move(phis); + } // Swap the state so that the new nodes are now the previous nodes. void NextIterationState() { previous_phi_ = new_phi; previous_continue_block_ = new_continue_block; previous_condition_block_ = new_condition_block; + previous_phis_ = std::move(new_phis_); // Clear new nodes. new_phi = nullptr; @@ -103,11 +113,16 @@ struct LoopUnrollState { // Clear new block/instruction maps. new_blocks.clear(); new_inst.clear(); + ids_to_new_inst.clear(); } // The induction variable from the immediately preceding loop body. ir::Instruction* previous_phi_; + // All the phi nodes from the previous loop iteration. + std::vector<ir::Instruction*> previous_phis_; + + std::vector<ir::Instruction*> new_phis_; // The previous continue block. The backedge will be removed from this and // added to the new continue block. ir::BasicBlock* previous_continue_block_; @@ -131,9 +146,11 @@ struct LoopUnrollState { // from. std::unordered_map<uint32_t, ir::BasicBlock*> new_blocks; - // A mapping of new instruction ids to the instruction ids from which they - // were copied. + // A mapping of the original instruction ids to the instruction ids to their + // copies. std::unordered_map<uint32_t, uint32_t> new_inst; + + std::unordered_map<uint32_t, ir::Instruction*> ids_to_new_inst; }; // This class implements the actual unrolling. It uses a LoopUnrollState to @@ -195,6 +212,22 @@ class LoopUnrollerUtilsImpl { // Extracts the initial state information from the |loop|. void Init(ir::Loop* loop); + // Replace the uses of each induction variable outside the loop with the final + // value of the induction variable before the loop exit. To reflect the proper + // state of a fully unrolled loop. + void ReplaceInductionUseWithFinalValue(ir::Loop* loop); + + // Remove all the instructions in the invalidated_instructions_ vector. + void RemoveDeadInstructions(); + + // Replace any use of induction variables outwith the loop with the final + // value of the induction variable in the unrolled loop. + void ReplaceOutsideLoopUseWithFinalValue(ir::Loop* loop); + + // Set the LoopControl operand of the OpLoopMerge instruction to be + // DontUnroll. + void MarkLoopControlAsDontUnroll(ir::Loop* loop) const; + private: // Remap all the in |basic_block| to new IDs and keep the mapping of new ids // to old @@ -234,6 +267,12 @@ class LoopUnrollerUtilsImpl { // the parent exists. void AddBlocksToLoop(ir::Loop* loop) const; + // After the partially unroll step the phi instructions in the header block + // will be in an illegal format. This function makes the phis legal by making + // the edge from the latch block come from the new latch block and the value + // to be the actual value of the phi at that point. + void LinkLastPhisToStart(ir::Loop* loop) const; + // A pointer to the IRContext. Used to add/remove instructions and for usedef // chains. ir::IRContext* context_; @@ -246,7 +285,7 @@ class LoopUnrollerUtilsImpl { BasicBlockListTy blocks_to_add_; // List of instructions which are now dead and can be removed. - std::vector<ir::Instruction*> dead_instructions_; + std::vector<ir::Instruction*> invalidated_instructions_; // Maintains the current state of the transform between calls to unroll. LoopUnrollState state_; @@ -261,6 +300,10 @@ class LoopUnrollerUtilsImpl { // The induction variable of the loop. ir::Instruction* loop_induction_variable_; + // Phis used in the loop need to be remapped to use the actual result values + // and then be remapped at the end. + std::vector<ir::Instruction*> loop_phi_instructions_; + // The number of loop iterations that the loop would preform pre-unroll. size_t number_of_loop_iterations_; @@ -300,7 +343,7 @@ void LoopUnrollerUtilsImpl::Init(ir::Loop* loop) { } assert(loop_condition_block_); - loop_induction_variable_ = loop->FindInductionVariable(loop_condition_block_); + loop_induction_variable_ = loop->FindConditionVariable(loop_condition_block_); assert(loop_induction_variable_); bool found = loop->FindNumberOfIterations( @@ -308,6 +351,9 @@ void LoopUnrollerUtilsImpl::Init(ir::Loop* loop) { &number_of_loop_iterations_, &loop_step_value_, &loop_init_value_); (void)found; // To silence unused variable warning on release builds. assert(found); + + // Blocks are stored in an unordered set of ids in the loop class, we need to + // create the dominator ordered list. ComputeLoopOrderedBlocks(loop); } @@ -318,7 +364,6 @@ void LoopUnrollerUtilsImpl::Init(ir::Loop* loop) { // number of bodies. void LoopUnrollerUtilsImpl::PartiallyUnrollResidualFactor(ir::Loop* loop, size_t factor) { - // Create a new merge block for the first loop. std::unique_ptr<ir::Instruction> new_label{new ir::Instruction( context_, SpvOp::SpvOpLabel, 0, context_->TakeNextId(), {})}; std::unique_ptr<ir::BasicBlock> new_exit_bb{ @@ -332,7 +377,6 @@ void LoopUnrollerUtilsImpl::PartiallyUnrollResidualFactor(ir::Loop* loop, blocks_to_add_.push_back(std::move(new_exit_bb)); ir::BasicBlock* new_exit_bb_raw = blocks_to_add_[0].get(); ir::Instruction& original_conditional_branch = *loop_condition_block_->tail(); - // Duplicate the loop, providing access to the blocks of both loops. // This is a naked new due to the VS2013 requirement of not having unique // pointers in vectors, as it will be inserted into a vector with @@ -348,20 +392,45 @@ void LoopUnrollerUtilsImpl::PartiallyUnrollResidualFactor(ir::Loop* loop, AddBlocksToFunction(loop->GetMergeBlock()); blocks_to_add_.clear(); + // Create a new merge block for the first loop. InstructionBuilder builder{context_, new_exit_bb_raw}; // Make the first loop branch to the second. builder.AddBranch(new_loop->GetHeaderBlock()->id()); loop_condition_block_ = state_.new_condition_block; loop_induction_variable_ = state_.new_phi; - // Unroll the new loop by the factor with the usual -1 to account for the // existing block iteration. Unroll(new_loop, factor); + LinkLastPhisToStart(new_loop); + AddBlocksToLoop(new_loop); + + // Add the new merge block to the back of the list of blocks to be added. It + // needs to be the last block added to maintain dominator order in the binary. + blocks_to_add_.push_back( + std::unique_ptr<ir::BasicBlock>(new_loop->GetMergeBlock())); + + // Add the blocks to the function. + AddBlocksToFunction(loop->GetMergeBlock()); + + // Reset the usedef analysis. + context_->InvalidateAnalysesExceptFor( + ir::IRContext::Analysis::kAnalysisLoopAnalysis); + opt::analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr(); + + // The loop condition. + ir::Instruction* condition_check = def_use_manager->GetDef( + original_conditional_branch.GetSingleWordOperand(0)); + + // This should have been checked by the LoopUtils::CanPerformUnroll function + // before entering this. + assert(loop->IsSupportedCondition(condition_check->opcode())); + // We need to account for the initial body when calculating the remainder. - int64_t remainder = loop_init_value_ + - (number_of_loop_iterations_ % factor) * loop_step_value_; + int64_t remainder = ir::Loop::GetResidualConditionValue( + condition_check->opcode(), loop_init_value_, loop_step_value_, + number_of_loop_iterations_, factor); assert(remainder > std::numeric_limits<int32_t>::min() && remainder < std::numeric_limits<int32_t>::max()); @@ -380,36 +449,45 @@ void LoopUnrollerUtilsImpl::PartiallyUnrollResidualFactor(ir::Loop* loop, uint32_t constant_id = new_constant->result_id(); - // Add the merge block to the back of the binary. - blocks_to_add_.push_back( - std::unique_ptr<ir::BasicBlock>(new_loop->GetMergeBlock())); - - AddBlocksToLoop(new_loop); - // Add the blocks to the function. - AddBlocksToFunction(loop->GetMergeBlock()); - - // Reset the usedef analysis. - context_->InvalidateAnalysesExceptFor( - ir::IRContext::Analysis::kAnalysisLoopAnalysis); - opt::analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr(); - // Update the condition check. - ir::Instruction* condition_check = def_use_manager->GetDef( - original_conditional_branch.GetSingleWordOperand(0)); - - // This should have been checked by the LoopUtils::CanPerformUnroll function - // before entering this. - assert(condition_check->opcode() == SpvOpSLessThan); condition_check->SetInOperand(1, {constant_id}); // Update the next phi node. The phi will have a constant value coming in from // the preheader block. For the duplicated loop we need to update the constant // to be the amount of iterations covered by the first loop and the incoming // block to be the first loops new merge block. - uint32_t phi_incoming_index = - GetPhiIndexFromLabel(loop->GetPreHeaderBlock(), loop_induction_variable_); - loop_induction_variable_->SetInOperand(phi_incoming_index - 1, {constant_id}); - loop_induction_variable_->SetInOperand(phi_incoming_index, {new_merge_id}); + std::vector<ir::Instruction*> new_inductions; + new_loop->GetInductionVariables(new_inductions); + + std::vector<ir::Instruction*> old_inductions; + loop->GetInductionVariables(old_inductions); + for (size_t index = 0; index < new_inductions.size(); ++index) { + ir::Instruction* new_induction = new_inductions[index]; + ir::Instruction* old_induction = old_inductions[index]; + // Get the index of the loop initalizer, the value coming in from the + // preheader. + uint32_t initalizer_index = + GetPhiIndexFromLabel(new_loop->GetPreHeaderBlock(), old_induction); + + // Replace the second loop initalizer with the phi from the first + new_induction->SetInOperand(initalizer_index - 1, + {old_induction->result_id()}); + new_induction->SetInOperand(initalizer_index, {new_merge_id}); + + // If the use of the first loop induction variable is outside of the loop + // then replace that use with the second loop induction variable. + uint32_t second_loop_induction = new_induction->result_id(); + auto replace_use_outside_of_loop = [loop, second_loop_induction]( + ir::Instruction* user, + uint32_t operand_index) { + if (!loop->IsInsideLoop(user)) { + user->SetOperand(operand_index, {second_loop_induction}); + } + }; + + context_->get_def_use_mgr()->ForEachUse(old_induction, + replace_use_outside_of_loop); + } context_->InvalidateAnalysesExceptFor( ir::IRContext::Analysis::kAnalysisLoopAnalysis); @@ -420,36 +498,58 @@ void LoopUnrollerUtilsImpl::PartiallyUnrollResidualFactor(ir::Loop* loop, *context_->GetLoopDescriptor(&function_); loop_descriptor.AddLoop(new_loop, loop->GetParent()); + + RemoveDeadInstructions(); } -// Duplicate the |loop| body |factor| number of times while keeping the loop -// backedge intact. -void LoopUnrollerUtilsImpl::PartiallyUnroll(ir::Loop* loop, size_t factor) { - Unroll(loop, factor); - AddBlocksToLoop(loop); - AddBlocksToFunction(loop->GetMergeBlock()); +// Mark this loop as DontUnroll as it will already be unrolled and it may not +// be safe to unroll a previously partially unrolled loop. +void LoopUnrollerUtilsImpl::MarkLoopControlAsDontUnroll(ir::Loop* loop) const { + ir::Instruction* loop_merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst(); + assert(loop_merge_inst && + "Loop merge instruction could not be found after entering unroller " + "(should have exited before this)"); + loop_merge_inst->SetInOperand(kLoopControlIndex, + {kLoopControlDontUnrollIndex}); } -// Duplicate the |loop| body |factor| number of times while keeping the loop -// backedge intact. +// Duplicate the |loop| body |factor| - 1 number of times while keeping the loop +// backedge intact. This will leave the loop with |factor| number of bodies +// after accounting for the initial body. void LoopUnrollerUtilsImpl::Unroll(ir::Loop* loop, size_t factor) { + // If we unroll a loop partially it will not be safe to unroll it further. + // This is due to the current method of calculating the number of loop + // iterations. + MarkLoopControlAsDontUnroll(loop); + + std::vector<ir::Instruction*> inductions; + loop->GetInductionVariables(inductions); state_ = LoopUnrollState{loop_induction_variable_, loop->GetLatchBlock(), - loop_condition_block_}; + loop_condition_block_, std::move(inductions)}; for (size_t i = 0; i < factor - 1; ++i) { CopyBody(loop, true); } +} - uint32_t phi_index = GetPhiIndexFromLabel(state_.previous_continue_block_, - state_.previous_phi_); - uint32_t phi_variable = - state_.previous_phi_->GetSingleWordInOperand(phi_index - 1); - uint32_t phi_label = state_.previous_phi_->GetSingleWordInOperand(phi_index); - - ir::Instruction* original_phi = loop_induction_variable_; +void LoopUnrollerUtilsImpl::RemoveDeadInstructions() { + // Remove the dead instructions. + for (ir::Instruction* inst : invalidated_instructions_) { + context_->KillInst(inst); + } +} - // SetInOperands are offset by two. - original_phi->SetInOperand(phi_index - 1, {phi_variable}); - original_phi->SetInOperand(phi_index, {phi_label}); +void LoopUnrollerUtilsImpl::ReplaceInductionUseWithFinalValue(ir::Loop* loop) { + context_->InvalidateAnalysesExceptFor( + ir::IRContext::Analysis::kAnalysisLoopAnalysis); + std::vector<ir::Instruction*> inductions; + loop->GetInductionVariables(inductions); + + for (size_t index = 0; index < inductions.size(); ++index) { + uint32_t trip_step_id = GetPhiDefID(state_.previous_phis_[index], + state_.previous_continue_block_->id()); + context_->ReplaceAllUsesWith(inductions[index]->result_id(), trip_step_id); + invalidated_instructions_.push_back(inductions[index]); + } } // Fully unroll the loop by partially unrolling it by the number of loop @@ -476,6 +576,9 @@ void LoopUnrollerUtilsImpl::FullyUnroll(ir::Loop* loop) { // Add the blocks to the function. AddBlocksToFunction(loop->GetMergeBlock()); + ReplaceInductionUseWithFinalValue(loop); + + RemoveDeadInstructions(); // Invalidate all analyses. context_->InvalidateAnalysesExceptFor( ir::IRContext::Analysis::kAnalysisLoopAnalysis); @@ -490,7 +593,6 @@ void LoopUnrollerUtilsImpl::CopyBasicBlock(ir::Loop* loop, bool preserve_instructions) { // Clone the block exactly, including the IDs. ir::BasicBlock* basic_block = itr->Clone(context_); - basic_block->SetParent(itr->GetParent()); // Assign each result a new unique ID and keep a mapping of the old ids to @@ -515,7 +617,7 @@ void LoopUnrollerUtilsImpl::CopyBasicBlock(ir::Loop* loop, if (!preserve_instructions) { // Remove the loop merge instruction if it exists. ir::Instruction* merge_inst = basic_block->GetLoopMergeInst(); - if (merge_inst) context_->KillInst(merge_inst); + if (merge_inst) invalidated_instructions_.push_back(merge_inst); } } @@ -551,10 +653,26 @@ void LoopUnrollerUtilsImpl::CopyBody(ir::Loop* loop, ir::Instruction& new_continue_branch = *state_.new_continue_block->tail(); new_continue_branch.SetInOperand(0, {loop->GetHeaderBlock()->id()}); - // Update references to the old phi node with the actual variable. - const ir::Instruction* induction = loop_induction_variable_; - state_.new_inst[induction->result_id()] = - GetPhiDefID(state_.previous_phi_, state_.previous_continue_block_->id()); + std::vector<ir::Instruction*> inductions; + loop->GetInductionVariables(inductions); + for (size_t index = 0; index < inductions.size(); ++index) { + ir::Instruction* master_copy = inductions[index]; + + assert(master_copy->result_id() != 0); + ir::Instruction* induction_clone = + state_.ids_to_new_inst[state_.new_inst[master_copy->result_id()]]; + + state_.new_phis_.push_back(induction_clone); + assert(induction_clone->result_id() != 0); + + if (!state_.previous_phis_.empty()) { + state_.new_inst[master_copy->result_id()] = GetPhiDefID( + state_.previous_phis_[index], state_.previous_continue_block_->id()); + } else { + // Do not replace the first phi block ids. + state_.new_inst[master_copy->result_id()] = master_copy->result_id(); + } + } if (eliminate_conditions && state_.new_condition_block != loop_condition_block_) { @@ -569,7 +687,8 @@ void LoopUnrollerUtilsImpl::CopyBody(ir::Loop* loop, RemapOperands(pair.second); } - dead_instructions_.push_back(state_.new_phi); + for (ir::Instruction* dead_phi : state_.new_phis_) + invalidated_instructions_.push_back(dead_phi); // Swap the state so the new is now the previous. state_.NextIterationState(); @@ -582,7 +701,7 @@ uint32_t LoopUnrollerUtilsImpl::GetPhiDefID(const ir::Instruction* phi, return phi->GetSingleWordOperand(operand - 1); } } - + assert(false && "Could not find a phi index matching the provided label"); return 0; } @@ -591,8 +710,8 @@ void LoopUnrollerUtilsImpl::FoldConditionBlock(ir::BasicBlock* condition_block, // Remove the old conditional branch to the merge and continue blocks. ir::Instruction& old_branch = *condition_block->tail(); uint32_t new_target = old_branch.GetSingleWordOperand(operand_label); - context_->KillInst(&old_branch); + context_->KillInst(&old_branch); // Add the new unconditional branch to the merge block. InstructionBuilder builder{context_, condition_block}; builder.AddBranch(new_target); @@ -601,23 +720,35 @@ void LoopUnrollerUtilsImpl::FoldConditionBlock(ir::BasicBlock* condition_block, void LoopUnrollerUtilsImpl::CloseUnrolledLoop(ir::Loop* loop) { // Remove the OpLoopMerge instruction from the function. ir::Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst(); - context_->KillInst(merge_inst); + invalidated_instructions_.push_back(merge_inst); // Remove the final backedge to the header and make it point instead to the // merge block. state_.previous_continue_block_->tail()->SetInOperand( 0, {loop->GetMergeBlock()->id()}); - // Remove the induction variable as the phi will now be invalid. Replace all - // uses with the constant initializer value (all uses of the phi will be in - // the first iteration with the subsequent phis already having been removed. - uint32_t initalizer_id = - GetPhiDefID(loop_induction_variable_, loop->GetPreHeaderBlock()->id()); - context_->ReplaceAllUsesWith(loop_induction_variable_->result_id(), - initalizer_id); + // Remove all induction variables as the phis will now be invalid. Replace all + // uses with the constant initializer value (all uses of phis will be in + // the first iteration with the subsequent phis already having been removed). + std::vector<ir::Instruction*> inductions; + loop->GetInductionVariables(inductions); + + // We can use the state instruction mechanism to replace all internal loop + // values within the first loop trip (as the subsequent ones will be updated + // by the copy function) with the value coming in from the preheader and then + // use context ReplaceAllUsesWith for the uses outside the loop with the final + // trip phi value. + state_.new_inst.clear(); + for (ir::Instruction* induction : inductions) { + uint32_t initalizer_id = + GetPhiDefID(induction, loop->GetPreHeaderBlock()->id()); + + state_.new_inst[induction->result_id()] = initalizer_id; + } - // Remove the now unused phi. - context_->KillInst(loop_induction_variable_); + for (ir::BasicBlock* block : loop_blocks_inorder_) { + RemapOperands(block); + } } // Uses the first loop to create a copy of the loop with new IDs. @@ -631,10 +762,14 @@ void LoopUnrollerUtilsImpl::DuplicateLoop(ir::Loop* old_loop, new_block_order.push_back(blocks_to_add_.back().get()); } + // Clone the merge block, give it a new id and record it in the state. ir::BasicBlock* new_merge = old_loop->GetMergeBlock()->Clone(context_); new_merge->SetParent(old_loop->GetMergeBlock()->GetParent()); AssignNewResultIds(new_merge); state_.new_blocks[old_loop->GetMergeBlock()->id()] = new_merge; + + // Remap the operands of every instruction in the loop to point to the new + // copies. for (auto& pair : state_.new_blocks) { RemapOperands(pair.second); } @@ -648,12 +783,11 @@ void LoopUnrollerUtilsImpl::DuplicateLoop(ir::Loop* old_loop, new_loop->SetMergeBlock(new_merge); } +// Whenever the utility copies a block it stores it in a tempory buffer, this +// function adds the buffer into the ir::Function. The blocks will be inserted +// after the block |insert_point|. void LoopUnrollerUtilsImpl::AddBlocksToFunction( const ir::BasicBlock* insert_point) { - for (ir::Instruction* inst : dead_instructions_) { - context_->KillInst(inst); - } - for (auto basic_block_iterator = function_.begin(); basic_block_iterator != function_.end(); ++basic_block_iterator) { if (basic_block_iterator->id() == insert_point->id()) { @@ -691,12 +825,12 @@ void LoopUnrollerUtilsImpl::AssignNewResultIds(ir::BasicBlock* basic_block) { // Save the mapping of old_id -> new_id. state_.new_inst[old_id] = inst.result_id(); - // Check if this instruction is the induction variable. if (loop_induction_variable_->result_id() == old_id) { // Save a pointer to the new copy of it. state_.new_phi = &inst; } + state_.ids_to_new_inst[inst.result_id()] = &inst; } } @@ -706,6 +840,7 @@ void LoopUnrollerUtilsImpl::RemapOperands(ir::BasicBlock* basic_block) { for (ir::Instruction& inst : *basic_block) { auto remap_operands_to_new_ids = [this](uint32_t* id) { auto itr = state_.new_inst.find(*id); + if (itr != state_.new_inst.end()) { *id = itr->second; } @@ -719,26 +854,7 @@ void LoopUnrollerUtilsImpl::RemapOperands(ir::BasicBlock* basic_block) { // later use. void LoopUnrollerUtilsImpl::ComputeLoopOrderedBlocks(ir::Loop* loop) { loop_blocks_inorder_.clear(); - - opt::DominatorAnalysis* analysis = - context_->GetDominatorAnalysis(&function_, *context_->cfg()); - opt::DominatorTree& tree = analysis->GetDomTree(); - - // Starting at the loop header BasicBlock, traverse the dominator tree until - // we reach the merge block and add every node we traverse to the set of - // blocks - // which we consider to be the loop. - auto begin_itr = tree.GetTreeNode(loop->GetHeaderBlock())->df_begin(); - const ir::BasicBlock* merge = loop->GetMergeBlock(); - auto func = [merge, &tree, this](DominatorTreeNode* node) { - if (!tree.Dominates(merge->id(), node->id())) { - this->loop_blocks_inorder_.push_back(node->bb_); - return true; - } - return false; - }; - - tree.VisitChildrenIf(func, begin_itr); + loop->ComputeLoopStructuredOrder(&loop_blocks_inorder_); } // Adds the blocks_to_add_ to both the loop and to the parent. @@ -752,6 +868,35 @@ void LoopUnrollerUtilsImpl::AddBlocksToLoop(ir::Loop* loop) const { if (loop->GetParent()) AddBlocksToLoop(loop->GetParent()); } +void LoopUnrollerUtilsImpl::LinkLastPhisToStart(ir::Loop* loop) const { + std::vector<ir::Instruction*> inductions; + loop->GetInductionVariables(inductions); + + for (size_t i = 0; i < inductions.size(); ++i) { + ir::Instruction* last_phi_in_block = state_.previous_phis_[i]; + + uint32_t phi_index = GetPhiIndexFromLabel(state_.previous_continue_block_, + last_phi_in_block); + uint32_t phi_variable = + last_phi_in_block->GetSingleWordInOperand(phi_index - 1); + uint32_t phi_label = last_phi_in_block->GetSingleWordInOperand(phi_index); + + ir::Instruction* phi = inductions[i]; + phi->SetInOperand(phi_index - 1, {phi_variable}); + phi->SetInOperand(phi_index, {phi_label}); + } +} + +// Duplicate the |loop| body |factor| number of times while keeping the loop +// backedge intact. +void LoopUnrollerUtilsImpl::PartiallyUnroll(ir::Loop* loop, size_t factor) { + Unroll(loop, factor); + LinkLastPhisToStart(loop); + AddBlocksToLoop(loop); + AddBlocksToFunction(loop->GetMergeBlock()); + RemoveDeadInstructions(); +} + /* * End LoopUtilsImpl. */ @@ -775,7 +920,7 @@ bool LoopUtils::CanPerformUnroll() { if (!condition) return false; // Check that we can find and process the induction variable. - const ir::Instruction* induction = loop_->FindInductionVariable(condition); + const ir::Instruction* induction = loop_->FindConditionVariable(condition); if (!induction || induction->opcode() != SpvOpPhi) return false; // Check that we can find the number of loop iterations. @@ -792,15 +937,8 @@ bool LoopUtils::CanPerformUnroll() { return false; } - // Make sure the induction is the only phi instruction we have in the loop - // header. Other optimizations have been seen to leave dead phi nodes in the - // header so we also check that the phi is used. - for (const ir::Instruction& inst : *loop_->GetHeaderBlock()) { - if (inst.opcode() == SpvOpPhi && - inst.result_id() != induction->result_id()) { - return false; - } - } + std::vector<ir::Instruction*> inductions; + loop_->GetInductionVariables(inductions); // Ban breaks within the loop. const std::vector<uint32_t>& merge_block_preds = @@ -832,33 +970,6 @@ bool LoopUtils::CanPerformUnroll() { return false; } - for (uint32_t block_id : loop_->GetBlocks()) { - opt::analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr(); - - ir::BasicBlock& bb = *context_->cfg()->block(block_id); - // For every instruction in the block. - for (ir::Instruction& inst : bb) { - if (inst.result_id() == 0) continue; - - auto is_used_outside_loop = [this, - def_use_manager](ir::Instruction* user) { - - if (!loop_->IsInsideLoop(user)) { - // Some optimization passes have been seen to leave dead phis in the - // IR so we check that if a phi is used outside of the loop that the - // user is not dead. - if (!(user->opcode() == SpvOpPhi && - def_use_manager->NumUsers(user) == 0)) - return false; - } - return true; - }; - - if (!def_use_manager->WhileEachUser(&inst, is_used_outside_loop)) { - return false; - } - } - } return true; } @@ -893,6 +1004,9 @@ bool LoopUtils::PartiallyUnroll(size_t factor) { bool LoopUtils::FullyUnroll() { if (!CanPerformUnroll()) return false; + std::vector<ir::Instruction*> inductions; + loop_->GetInductionVariables(inductions); + LoopUnrollerUtilsImpl unroller{context_, loop_->GetHeaderBlock()->GetParent()}; @@ -926,7 +1040,11 @@ Pass::Status LoopUnroller::Process(ir::IRContext* c) { continue; } - loop_utils.FullyUnroll(); + if (fully_unroll_) { + loop_utils.FullyUnroll(); + } else { + loop_utils.PartiallyUnroll(unroll_factor_); + } changed = true; } LD->PostModificationCleanup(); diff --git a/source/opt/loop_unroller.h b/source/opt/loop_unroller.h index 38dfa347..caf0a8ed 100644 --- a/source/opt/loop_unroller.h +++ b/source/opt/loop_unroller.h @@ -21,7 +21,9 @@ namespace opt { class LoopUnroller : public Pass { public: - LoopUnroller() : Pass() {} + LoopUnroller() : Pass(), fully_unroll_(true), unroll_factor_(0) {} + LoopUnroller(bool fully_unroll, int unroll_factor) + : Pass(), fully_unroll_(fully_unroll), unroll_factor_(unroll_factor) {} const char* name() const override { return "Loop unroller"; } @@ -29,6 +31,8 @@ class LoopUnroller : public Pass { private: ir::IRContext* context_; + bool fully_unroll_; + int unroll_factor_; }; } // namespace opt diff --git a/source/opt/loop_utils.h b/source/opt/loop_utils.h index 3eb20de0..89e69367 100644 --- a/source/opt/loop_utils.h +++ b/source/opt/loop_utils.h @@ -86,7 +86,7 @@ class LoopUtils { // // The conditions checked to ensure the loop can be unrolled are as follows: // 1. That the loop is in structured order. - // 2. That the condinue block is a branch to the header. + // 2. That the continue block is a branch to the header. // 3. That the only phi used in the loop is the induction variable. // TODO(stephen@codeplay.com): This is a temporary mesure, after the loop is // converted into LCSAA form and has a single entry and exit we can rewrite diff --git a/source/opt/optimizer.cpp b/source/opt/optimizer.cpp index 0478d3a4..dced5db5 100644 --- a/source/opt/optimizer.cpp +++ b/source/opt/optimizer.cpp @@ -400,8 +400,8 @@ Optimizer::PassToken CreateSimplificationPass() { MakeUnique<opt::SimplificationPass>()); } -Optimizer::PassToken CreateLoopFullyUnrollPass() { +Optimizer::PassToken CreateLoopUnrollPass(bool fully_unroll, int factor) { return MakeUnique<Optimizer::PassToken::Impl>( - MakeUnique<opt::LoopUnroller>()); + MakeUnique<opt::LoopUnroller>(fully_unroll, factor)); } } // namespace spvtools |