diff options
Diffstat (limited to 'runtime/onert/backend/acl_common/AclKernelGen.h')
-rw-r--r-- | runtime/onert/backend/acl_common/AclKernelGen.h | 166 |
1 files changed, 88 insertions, 78 deletions
diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h index 372ce689e..e05d36a12 100644 --- a/runtime/onert/backend/acl_common/AclKernelGen.h +++ b/runtime/onert/backend/acl_common/AclKernelGen.h @@ -30,6 +30,20 @@ namespace backend namespace acl_common { +void enableDimCorrection(IACLTensor *tensor) +{ + size_t input_rank = tensor->getShape().rank(); + const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape()) + .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), true); +} + +void disableDimCorrection(IACLTensor *tensor) +{ + size_t input_rank = tensor->getShape().rank(); + const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape()) + .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), false); +} + template <typename Layer, typename... Args> std::unique_ptr<arm_compute::IFunction> generateLayer(Args &&... args) { @@ -60,49 +74,49 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node, // TODO Support dynamic rnn // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection. const auto scratch_buffer_index{ - node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; + node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; const auto output_state_out_index{ - node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; + node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; const auto cell_state_out_index{ - node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)}; + node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)}; const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)}; const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)}; const auto input_to_input_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional const auto input_to_forget_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)}; + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)}; const auto input_to_cell_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)}; + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)}; const auto input_to_output_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)}; + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)}; const auto recurrent_to_input_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional const auto recurrent_to_forget_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)}; + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)}; const auto recurrent_to_cell_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)}; + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)}; const auto recurrent_to_output_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)}; + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)}; const auto cell_to_input_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional + node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional const auto cell_to_forget_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional + node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional const auto cell_to_output_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional + node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional const auto input_gate_bias_index{ - node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)}; + node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)}; const auto forget_gate_bias_index{ - node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)}; + node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)}; const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)}; const auto output_gate_bias_index{ - node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)}; + node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)}; const auto projection_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional + node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional const auto projection_bias_index{ - node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional + node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional const auto output_state_in_index{ - node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)}; + node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)}; const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)}; const auto cell_threshold = node.param().cell_threshold; const auto projection_threshold = node.param().projection_threshold; @@ -110,8 +124,8 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node, bool has_input_to_input_weights = operands.at(input_to_input_weights_index).shape().dim(0) != 0 && operands.at(input_to_input_weights_index).shape().dim(1) != 0; bool has_recurrent_to_input_weights = - operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && - operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0; + operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && + operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0; bool has_cell_to_forget_weights = operands.at(cell_to_forget_weights_index).shape().dim(0) != 0; bool has_cell_to_output_weights = operands.at(cell_to_output_weights_index).shape().dim(0) != 0; bool has_projection_weights = operands.at(projection_weights_index).shape().dim(0) != 0 && @@ -138,30 +152,27 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node, const auto projection_clip = projection_threshold; assert(cell_clip >= 0.f && projection_clip >= 0.f); - auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index).get(); - auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index).get(); - auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index).get(); - auto output_tensor = tensor_reg->getAclTensor(output_index).get(); + auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index); + auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index); + auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index); + auto output_tensor = tensor_reg->getAclTensor(output_index); - auto input_tensor = tensor_reg->getAclTensor(input_index).get(); + auto input_tensor = tensor_reg->getAclTensor(input_index); - auto input_to_forget_weights_tensor = - tensor_reg->getAclTensor(input_to_forget_weights_index).get(); - auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index).get(); - auto input_to_output_weights_tensor = - tensor_reg->getAclTensor(input_to_output_weights_index).get(); + auto input_to_forget_weights_tensor = tensor_reg->getAclTensor(input_to_forget_weights_index); + auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index); + auto input_to_output_weights_tensor = tensor_reg->getAclTensor(input_to_output_weights_index); auto recurrent_to_forget_weights_tensor = - tensor_reg->getAclTensor(recurrent_to_forget_weights_index).get(); - auto recurrent_to_cell_weights_tensor = - tensor_reg->getAclTensor(recurrent_to_cell_weights_index).get(); + tensor_reg->getAclTensor(recurrent_to_forget_weights_index); + auto recurrent_to_cell_weights_tensor = tensor_reg->getAclTensor(recurrent_to_cell_weights_index); auto recurrent_to_output_weights_tensor = - tensor_reg->getAclTensor(recurrent_to_output_weights_index).get(); + tensor_reg->getAclTensor(recurrent_to_output_weights_index); - auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index).get(); - auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index).get(); - auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index).get(); - auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index).get(); - auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index).get(); + auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index); + auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index); + auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index); + auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index); + auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index); auto act_info = asActivationLayerInfo(activation); @@ -169,13 +180,13 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node, if (has_cifg_param) { auto input_to_input_weights_tensor = - tensor_reg->getAclTensor(input_to_input_weights_index).get(); // optional + tensor_reg->getAclTensor(input_to_input_weights_index); // optional auto recurrent_to_input_weights_tensor = - tensor_reg->getAclTensor(recurrent_to_input_weights_index).get(); // optional + tensor_reg->getAclTensor(recurrent_to_input_weights_index); // optional auto cell_to_input_weights_handle = - has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index).get()->handle() - : nullptr; // optional (non-cifg && peephole) - auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index).get(); // optional + has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index)->handle() + : nullptr; // optional (non-cifg && peephole) + auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index); // optional lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(), recurrent_to_input_weights_tensor->handle(), cell_to_input_weights_handle, input_gate_bias_tensor->handle()); @@ -183,32 +194,30 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node, if (has_peephole_param) { auto cell_to_forget_weights_tensor = - tensor_reg->getAclTensor(cell_to_forget_weights_index).get(); // optional + tensor_reg->getAclTensor(cell_to_forget_weights_index); // optional auto cell_to_output_weights_tensor = - tensor_reg->getAclTensor(cell_to_output_weights_index).get(); // optional + tensor_reg->getAclTensor(cell_to_output_weights_index); // optional lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(), cell_to_output_weights_tensor->handle()); } if (has_projection_param) { - auto projection_weights_tensor = - tensor_reg->getAclTensor(projection_weights_index).get(); // optional - auto projection_bias_handle = - has_projection_bias ? tensor_reg->getAclTensor(projection_bias_index).get()->handle() - : nullptr; // optional + auto projection_weights_tensor = tensor_reg->getAclTensor(projection_weights_index); // optional + auto projection_bias_handle = has_projection_bias + ? tensor_reg->getAclTensor(projection_bias_index)->handle() + : nullptr; // optional lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle); } auto fn = generateLayer<T_ACLLayer>( - input_tensor->handle(), input_to_forget_weights_tensor->handle(), - input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(), - recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(), - recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(), - cell_bias_tensor->handle(), output_gate_bias_tensor->handle(), - output_state_in_tensor->handle(), cell_state_in_tensor->handle(), - scratch_buffer_tensor->handle(), output_state_out_tensor->handle(), - cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info, cell_clip, - projection_clip); + input_tensor->handle(), input_to_forget_weights_tensor->handle(), + input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(), + recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(), + recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(), + cell_bias_tensor->handle(), output_gate_bias_tensor->handle(), output_state_in_tensor->handle(), + cell_state_in_tensor->handle(), scratch_buffer_tensor->handle(), + output_state_out_tensor->handle(), cell_state_out_tensor->handle(), output_tensor->handle(), + lstm_params, act_info, cell_clip, projection_clip); return std::make_unique<T_FunctionWrapper>(std::move(fn)); } @@ -230,14 +239,14 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope const auto input_rank = operands.at(input_index).shape().rank(); const auto output_size = - operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1); + operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1); UNUSED_RELEASE(output_size); - assert(operands.at(bias_index).shape().dim(0) == output_size); + assert(bias_index.undefined() || operands.at(bias_index).shape().dim(0) == output_size); assert(operands.at(weight_index).shape().dim(0) == output_size); const auto batch_size = - operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2); + operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2); const auto input_size = - operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1); + operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1); // Check for reshaping input's shape into rank-2 bool needs_reshape = false; @@ -260,10 +269,10 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope reshape.dim(1) = input_size; /* W */ } - auto output_tensor = tensor_reg->getAclTensor(output_index).get(); - const auto input_tensor = tensor_reg->getAclTensor(input_index).get(); - const auto weight_tensor = tensor_reg->getAclTensor(weight_index).get(); - const auto bias_tensor = tensor_reg->getAclTensor(bias_index).get(); + auto output_tensor = tensor_reg->getAclTensor(output_index); + const auto input_tensor = tensor_reg->getAclTensor(input_index); + const auto weight_tensor = tensor_reg->getAclTensor(weight_index); + const auto bias_tensor = bias_index.undefined() ? nullptr : tensor_reg->getAclTensor(bias_index); const auto frontend_layout = layout; const auto acl_layout = output_tensor->handle()->info()->data_layout(); @@ -275,9 +284,10 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope } auto fn = generateLayer<T_ACLLayer>( - tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(), - weight_tensor->handle(), bias_tensor->handle(), output_tensor->handle(), needs_reshape, - asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type); + tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(), + weight_tensor->handle(), bias_tensor != nullptr ? bias_tensor->handle() : nullptr, + output_tensor->handle(), needs_reshape, + asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type); return std::make_unique<T_FunctionWrapper>(std::move(fn)); } @@ -298,7 +308,7 @@ kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands, const auto kw = node.param().kw; const auto stride = node.param().stride; const auto padding = - ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); VERBOSE(Pool2DParam) << "IFM_H: " << ifm_shape.H << std::endl; VERBOSE(Pool2DParam) << "IFM_W: " << ifm_shape.W << std::endl; @@ -313,12 +323,12 @@ kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands, VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl; VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl; - auto ofm_tensor = tensor_reg->getAclTensor(ofm_index).get(); - auto ifm_tensor = tensor_reg->getAclTensor(ifm_index).get(); + auto ofm_tensor = tensor_reg->getAclTensor(ofm_index); + auto ifm_tensor = tensor_reg->getAclTensor(ifm_index); ::arm_compute::PoolingLayerInfo info{ - pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(), - asPadStrideInfo(padding, stride), true /* exclude_padding */}; + pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(), + asPadStrideInfo(padding, stride), true /* exclude_padding */}; auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info); |