1 files changed, 88 insertions, 78 deletions
diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h
index 372ce689e..e05d36a12 100644
--- a/runtime/onert/backend/acl_common/AclKernelGen.h
+++ b/runtime/onert/backend/acl_common/AclKernelGen.h
@@ -30,6 +30,20 @@ namespace backend
 namespace acl_common
 {
 
+void enableDimCorrection(IACLTensor *tensor)
+{
+  size_t input_rank = tensor->getShape().rank();
+  const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
+    .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), true);
+}
+
+void disableDimCorrection(IACLTensor *tensor)
+{
+  size_t input_rank = tensor->getShape().rank();
+  const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
+    .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), false);
+}
+
 template <typename Layer, typename... Args>
 std::unique_ptr<arm_compute::IFunction> generateLayer(Args &&... args)
 {
@@ -60,49 +74,49 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
   // TODO Support dynamic rnn
   // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
   const auto scratch_buffer_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+    node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
   const auto output_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+    node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
   const auto cell_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+    node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
   const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
 
   const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
   const auto input_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
   const auto input_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
   const auto input_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
   const auto input_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
   const auto recurrent_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
   const auto recurrent_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
   const auto recurrent_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
   const auto recurrent_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
   const auto cell_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
   const auto cell_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
   const auto cell_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
   const auto input_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
   const auto forget_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+    node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
   const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
   const auto output_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+    node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
   const auto projection_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
   const auto projection_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
   const auto output_state_in_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+    node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
   const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
   const auto cell_threshold = node.param().cell_threshold;
   const auto projection_threshold = node.param().projection_threshold;
@@ -110,8 +124,8 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
   bool has_input_to_input_weights = operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
                                     operands.at(input_to_input_weights_index).shape().dim(1) != 0;
   bool has_recurrent_to_input_weights =
-      operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
-      operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+    operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+    operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
   bool has_cell_to_forget_weights = operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
   bool has_cell_to_output_weights = operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
   bool has_projection_weights = operands.at(projection_weights_index).shape().dim(0) != 0 &&
@@ -138,30 +152,27 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
   const auto projection_clip = projection_threshold;
   assert(cell_clip >= 0.f && projection_clip >= 0.f);
 
-  auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index).get();
-  auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index).get();
-  auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index).get();
-  auto output_tensor = tensor_reg->getAclTensor(output_index).get();
+  auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index);
+  auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index);
+  auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index);
+  auto output_tensor = tensor_reg->getAclTensor(output_index);
 
-  auto input_tensor = tensor_reg->getAclTensor(input_index).get();
+  auto input_tensor = tensor_reg->getAclTensor(input_index);
 
-  auto input_to_forget_weights_tensor =
-      tensor_reg->getAclTensor(input_to_forget_weights_index).get();
-  auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index).get();
-  auto input_to_output_weights_tensor =
-      tensor_reg->getAclTensor(input_to_output_weights_index).get();
+  auto input_to_forget_weights_tensor = tensor_reg->getAclTensor(input_to_forget_weights_index);
+  auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index);
+  auto input_to_output_weights_tensor = tensor_reg->getAclTensor(input_to_output_weights_index);
   auto recurrent_to_forget_weights_tensor =
-      tensor_reg->getAclTensor(recurrent_to_forget_weights_index).get();
-  auto recurrent_to_cell_weights_tensor =
-      tensor_reg->getAclTensor(recurrent_to_cell_weights_index).get();
+    tensor_reg->getAclTensor(recurrent_to_forget_weights_index);
+  auto recurrent_to_cell_weights_tensor = tensor_reg->getAclTensor(recurrent_to_cell_weights_index);
   auto recurrent_to_output_weights_tensor =
-      tensor_reg->getAclTensor(recurrent_to_output_weights_index).get();
+    tensor_reg->getAclTensor(recurrent_to_output_weights_index);
 
-  auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index).get();
-  auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index).get();
-  auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index).get();
-  auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index).get();
-  auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index).get();
+  auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index);
+  auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index);
+  auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index);
+  auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index);
+  auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index);
 
   auto act_info = asActivationLayerInfo(activation);
 
@@ -169,13 +180,13 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
   if (has_cifg_param)
   {
     auto input_to_input_weights_tensor =
-        tensor_reg->getAclTensor(input_to_input_weights_index).get(); // optional
+      tensor_reg->getAclTensor(input_to_input_weights_index); // optional
     auto recurrent_to_input_weights_tensor =
-        tensor_reg->getAclTensor(recurrent_to_input_weights_index).get(); // optional
+      tensor_reg->getAclTensor(recurrent_to_input_weights_index); // optional
     auto cell_to_input_weights_handle =
-        has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index).get()->handle()
-                           : nullptr; // optional (non-cifg && peephole)
-    auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index).get(); // optional
+      has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index)->handle()
+                         : nullptr; // optional (non-cifg && peephole)
+    auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index); // optional
     lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
                                 recurrent_to_input_weights_tensor->handle(),
                                 cell_to_input_weights_handle, input_gate_bias_tensor->handle());
@@ -183,32 +194,30 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
   if (has_peephole_param)
   {
     auto cell_to_forget_weights_tensor =
-        tensor_reg->getAclTensor(cell_to_forget_weights_index).get(); // optional
+      tensor_reg->getAclTensor(cell_to_forget_weights_index); // optional
     auto cell_to_output_weights_tensor =
-        tensor_reg->getAclTensor(cell_to_output_weights_index).get(); // optional
+      tensor_reg->getAclTensor(cell_to_output_weights_index); // optional
     lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
                                     cell_to_output_weights_tensor->handle());
   }
   if (has_projection_param)
   {
-    auto projection_weights_tensor =
-        tensor_reg->getAclTensor(projection_weights_index).get(); // optional
-    auto projection_bias_handle =
-        has_projection_bias ? tensor_reg->getAclTensor(projection_bias_index).get()->handle()
-                            : nullptr; // optional
+    auto projection_weights_tensor = tensor_reg->getAclTensor(projection_weights_index); // optional
+    auto projection_bias_handle = has_projection_bias
+                                    ? tensor_reg->getAclTensor(projection_bias_index)->handle()
+                                    : nullptr; // optional
     lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
   }
 
   auto fn = generateLayer<T_ACLLayer>(
-      input_tensor->handle(), input_to_forget_weights_tensor->handle(),
-      input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
-      recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
-      recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
-      cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
-      output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
-      scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
-      cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info, cell_clip,
-      projection_clip);
+    input_tensor->handle(), input_to_forget_weights_tensor->handle(),
+    input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
+    recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
+    recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
+    cell_bias_tensor->handle(), output_gate_bias_tensor->handle(), output_state_in_tensor->handle(),
+    cell_state_in_tensor->handle(), scratch_buffer_tensor->handle(),
+    output_state_out_tensor->handle(), cell_state_out_tensor->handle(), output_tensor->handle(),
+    lstm_params, act_info, cell_clip, projection_clip);
 
   return std::make_unique<T_FunctionWrapper>(std::move(fn));
 }
@@ -230,14 +239,14 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
   const auto input_rank = operands.at(input_index).shape().rank();
 
   const auto output_size =
-      operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
+    operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
   UNUSED_RELEASE(output_size);
-  assert(operands.at(bias_index).shape().dim(0) == output_size);
+  assert(bias_index.undefined() || operands.at(bias_index).shape().dim(0) == output_size);
   assert(operands.at(weight_index).shape().dim(0) == output_size);
   const auto batch_size =
-      operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2);
+    operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2);
   const auto input_size =
-      operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1);
+    operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1);
 
   // Check for reshaping input's shape into rank-2
   bool needs_reshape = false;
@@ -260,10 +269,10 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
     reshape.dim(1) = input_size; /* W */
   }
 
-  auto output_tensor = tensor_reg->getAclTensor(output_index).get();
-  const auto input_tensor = tensor_reg->getAclTensor(input_index).get();
-  const auto weight_tensor = tensor_reg->getAclTensor(weight_index).get();
-  const auto bias_tensor = tensor_reg->getAclTensor(bias_index).get();
+  auto output_tensor = tensor_reg->getAclTensor(output_index);
+  const auto input_tensor = tensor_reg->getAclTensor(input_index);
+  const auto weight_tensor = tensor_reg->getAclTensor(weight_index);
+  const auto bias_tensor = bias_index.undefined() ? nullptr : tensor_reg->getAclTensor(bias_index);
   const auto frontend_layout = layout;
   const auto acl_layout = output_tensor->handle()->info()->data_layout();
 
@@ -275,9 +284,10 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
   }
 
   auto fn = generateLayer<T_ACLLayer>(
-      tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
-      weight_tensor->handle(), bias_tensor->handle(), output_tensor->handle(), needs_reshape,
-      asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type);
+    tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+    weight_tensor->handle(), bias_tensor != nullptr ? bias_tensor->handle() : nullptr,
+    output_tensor->handle(), needs_reshape,
+    asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type);
 
   return std::make_unique<T_FunctionWrapper>(std::move(fn));
 }
@@ -298,7 +308,7 @@ kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
   const auto kw = node.param().kw;
   const auto stride = node.param().stride;
   const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
 
   VERBOSE(Pool2DParam) << "IFM_H: " << ifm_shape.H << std::endl;
   VERBOSE(Pool2DParam) << "IFM_W: " << ifm_shape.W << std::endl;
@@ -313,12 +323,12 @@ kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
   VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
   VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
 
-  auto ofm_tensor = tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = tensor_reg->getAclTensor(ifm_index).get();
+  auto ofm_tensor = tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = tensor_reg->getAclTensor(ifm_index);
 
   ::arm_compute::PoolingLayerInfo info{
-      pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
-      asPadStrideInfo(padding, stride), true /* exclude_padding */};
+    pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
+    asPadStrideInfo(padding, stride), true /* exclude_padding */};
 
   auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info);