1 files changed, 343 insertions, 0 deletions
diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h
new file mode 100644
index 000000000..3d0813f81
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclKernelGen.h
@@ -0,0 +1,343 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACL_KERNEL_GEN_H_
+#define __ONERT_BACKEND_ACL_COMMON_ACL_KERNEL_GEN_H_
+
+#include <exec/IFunction.h>
+#include <ir/Operands.h>
+
+#include <ir/operation/LSTM.h>
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+void enableDimCorrection(IACLTensor *tensor)
+{
+  size_t input_rank = tensor->num_dimensions();
+  const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
+      .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), true);
+}
+
+void disableDimCorrection(IACLTensor *tensor)
+{
+  size_t input_rank = tensor->num_dimensions();
+  const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
+      .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), false);
+}
+
+template <typename Layer, typename... Args>
+std::unique_ptr<arm_compute::IFunction> generateLayer(Args &&... args)
+{
+  auto l = std::make_unique<Layer>();
+
+  l->configure(std::forward<Args>(args)...);
+
+  return l;
+}
+
+template <typename Layer, typename... Args>
+std::unique_ptr<arm_compute::IFunction>
+generateLayer(std::shared_ptr<arm_compute::IMemoryManager> memory_manager, Args &&... args)
+{
+  auto l = std::make_unique<Layer>(memory_manager);
+
+  l->configure(std::forward<Args>(args)...);
+
+  return l;
+}
+
+template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
+          typename T_TensorRegistry>
+std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
+                                               const ir::Operands &operands,
+                                               const std::shared_ptr<T_TensorRegistry> &tensor_reg)
+{
+  // TODO Support dynamic rnn
+  // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
+  const auto scratch_buffer_index{
+      node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+  const auto output_state_out_index{
+      node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+  const auto cell_state_out_index{
+      node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+  const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+
+  const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+  const auto input_to_input_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
+  const auto input_to_forget_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+  const auto input_to_cell_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+  const auto input_to_output_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+  const auto recurrent_to_input_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
+  const auto recurrent_to_forget_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+  const auto recurrent_to_cell_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+  const auto recurrent_to_output_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+  const auto cell_to_input_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
+  const auto cell_to_forget_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
+  const auto cell_to_output_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
+  const auto input_gate_bias_index{
+      node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
+  const auto forget_gate_bias_index{
+      node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+  const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
+  const auto output_gate_bias_index{
+      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+  const auto projection_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
+  const auto projection_bias_index{
+      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
+  const auto output_state_in_index{
+      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+  const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
+  const auto cell_threshold = node.param().cell_threshold;
+  const auto projection_threshold = node.param().projection_threshold;
+
+  bool has_input_to_input_weights = operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+                                    operands.at(input_to_input_weights_index).shape().dim(1) != 0;
+  bool has_recurrent_to_input_weights =
+      operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+      operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+  bool has_cell_to_forget_weights = operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
+  bool has_cell_to_output_weights = operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
+  bool has_projection_weights = operands.at(projection_weights_index).shape().dim(0) != 0 &&
+                                operands.at(projection_weights_index).shape().dim(1) != 0;
+  bool has_projection_bias = operands.at(projection_bias_index).shape().dim(0);
+
+  // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
+  // true: no CIFG
+  // false: CIFG
+  // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
+  bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
+
+  // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
+  // But the cell_to_input_weights does not exist in regular CIFG although peephole.
+  // true: peephole
+  // false: no peephole
+  bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
+
+  // NOTE Although the projection weights has data the projection bias may not have data.
+  bool has_projection_param = has_projection_weights;
+
+  const auto activation = node.param().activation;
+  const auto cell_clip = cell_threshold;
+  const auto projection_clip = projection_threshold;
+  assert(cell_clip >= 0.f && projection_clip >= 0.f);
+
+  auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index);
+  auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index);
+  auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index);
+  auto output_tensor = tensor_reg->getAclTensor(output_index);
+
+  auto input_tensor = tensor_reg->getAclTensor(input_index);
+
+  auto input_to_forget_weights_tensor = tensor_reg->getAclTensor(input_to_forget_weights_index);
+  auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index);
+  auto input_to_output_weights_tensor = tensor_reg->getAclTensor(input_to_output_weights_index);
+  auto recurrent_to_forget_weights_tensor =
+      tensor_reg->getAclTensor(recurrent_to_forget_weights_index);
+  auto recurrent_to_cell_weights_tensor = tensor_reg->getAclTensor(recurrent_to_cell_weights_index);
+  auto recurrent_to_output_weights_tensor =
+      tensor_reg->getAclTensor(recurrent_to_output_weights_index);
+
+  auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index);
+  auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index);
+  auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index);
+  auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index);
+  auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index);
+
+  auto act_info = asActivationLayerInfo(activation);
+
+  ::arm_compute::LSTMParams<T_Tensor> lstm_params{};
+  if (has_cifg_param)
+  {
+    auto input_to_input_weights_tensor =
+        tensor_reg->getAclTensor(input_to_input_weights_index); // optional
+    auto recurrent_to_input_weights_tensor =
+        tensor_reg->getAclTensor(recurrent_to_input_weights_index); // optional
+    auto cell_to_input_weights_handle =
+        has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index)->handle()
+                           : nullptr; // optional (non-cifg && peephole)
+    auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index); // optional
+    lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
+                                recurrent_to_input_weights_tensor->handle(),
+                                cell_to_input_weights_handle, input_gate_bias_tensor->handle());
+  }
+  if (has_peephole_param)
+  {
+    auto cell_to_forget_weights_tensor =
+        tensor_reg->getAclTensor(cell_to_forget_weights_index); // optional
+    auto cell_to_output_weights_tensor =
+        tensor_reg->getAclTensor(cell_to_output_weights_index); // optional
+    lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
+                                    cell_to_output_weights_tensor->handle());
+  }
+  if (has_projection_param)
+  {
+    auto projection_weights_tensor = tensor_reg->getAclTensor(projection_weights_index); // optional
+    auto projection_bias_handle = has_projection_bias
+                                      ? tensor_reg->getAclTensor(projection_bias_index)->handle()
+                                      : nullptr; // optional
+    lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
+  }
+
+  auto fn = generateLayer<T_ACLLayer>(
+      input_tensor->handle(), input_to_forget_weights_tensor->handle(),
+      input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
+      recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
+      recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
+      cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
+      output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
+      scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
+      cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info, cell_clip,
+      projection_clip);
+
+  return std::make_unique<T_FunctionWrapper>(std::move(fn));
+}
+
+template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
+          typename T_TensorBuilder, typename T_TensorRegistry>
+std::unique_ptr<exec::IFunction>
+kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands,
+                        const std::shared_ptr<T_TensorBuilder> &tensor_builder,
+                        const std::shared_ptr<T_TensorRegistry> &tensor_reg, ir::Layout layout)
+{
+  using ir::operation::FullyConnected;
+
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+  const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+  const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+
+  const auto input_rank = operands.at(input_index).shape().rank();
+
+  const auto output_size =
+      operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
+  UNUSED_RELEASE(output_size);
+  assert(bias_index.undefined() || operands.at(bias_index).shape().dim(0) == output_size);
+  assert(operands.at(weight_index).shape().dim(0) == output_size);
+  const auto batch_size =
+      operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2);
+  const auto input_size =
+      operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1);
+
+  // Check for reshaping input's shape into rank-2
+  bool needs_reshape = false;
+  ir::Shape reshape(2);
+  if (input_rank == 3 || input_rank == 4)
+  {
+    const auto &ifm_shape = operands.at(input_index).shape();
+    auto feature_size = 1;
+    for (int i = 0; i < ifm_shape.rank(); ++i)
+    {
+      feature_size *= ifm_shape.dim(i);
+    }
+
+    UNUSED_RELEASE(feature_size);
+    assert(feature_size == batch_size * input_size);
+
+    // for reshaping
+    needs_reshape = true;
+    reshape.dim(0) = batch_size; /* H */
+    reshape.dim(1) = input_size; /* W */
+  }
+
+  auto output_tensor = tensor_reg->getAclTensor(output_index);
+  const auto input_tensor = tensor_reg->getAclTensor(input_index);
+  const auto weight_tensor = tensor_reg->getAclTensor(weight_index);
+  const auto bias_tensor = bias_index.undefined() ? nullptr : tensor_reg->getAclTensor(bias_index);
+  const auto frontend_layout = layout;
+  const auto acl_layout = output_tensor->handle()->info()->data_layout();
+
+  typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL;
+  if (operands.at(weight_index).isConstant())
+  {
+    kernel_type = T_ACLLayer::KernelType::PREPROCESSED_WEIGHTS;
+    assert(operands.at(weight_index).data());
+  }
+
+  auto fn = generateLayer<T_ACLLayer>(
+      tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      weight_tensor->handle(), bias_tensor != nullptr ? bias_tensor->handle() : nullptr,
+      output_tensor->handle(), needs_reshape,
+      asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type);
+
+  return std::make_unique<T_FunctionWrapper>(std::move(fn));
+}
+
+template <typename T_ACLLayer, typename T_PoolOp, typename T_AclTensorRegistry>
+std::unique_ptr<::arm_compute::IFunction>
+kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
+                const std::shared_ptr<T_AclTensorRegistry> &tensor_reg, ir::Layout layout,
+                ::arm_compute::PoolingType pooling_type)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(0)};
+
+  const auto ofm_shape = operands.at(ofm_index).shape().asFeature(layout);
+  const auto ifm_shape = operands.at(ifm_index).shape().asFeature(layout);
+
+  const auto kh = node.param().kh;
+  const auto kw = node.param().kw;
+  const auto stride = node.param().stride;
+  const auto padding =
+      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+
+  VERBOSE(Pool2DParam) << "IFM_H: " << ifm_shape.H << std::endl;
+  VERBOSE(Pool2DParam) << "IFM_W: " << ifm_shape.W << std::endl;
+  VERBOSE(Pool2DParam) << "OFM_H: " << ofm_shape.H << std::endl;
+  VERBOSE(Pool2DParam) << "OFM_W: " << ofm_shape.W << std::endl;
+  VERBOSE(Pool2DParam) << "KER_H: " << kh << std::endl;
+  VERBOSE(Pool2DParam) << "KER_W: " << kw << std::endl;
+  VERBOSE(Pool2DParam) << "STRIDE_H: " << stride.vertical << std::endl;
+  VERBOSE(Pool2DParam) << "STRIDE_W: " << stride.horizontal << std::endl;
+  VERBOSE(Pool2DParam) << "PAD(T): " << padding.top << std::endl;
+  VERBOSE(Pool2DParam) << "PAD(B): " << padding.bottom << std::endl;
+  VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
+  VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
+
+  auto ofm_tensor = tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = tensor_reg->getAclTensor(ifm_index);
+
+  ::arm_compute::PoolingLayerInfo info{
+      pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
+      asPadStrideInfo(padding, stride), true /* exclude_padding */};
+
+  auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info);
+
+  return fn;
+}
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACL_KERNEL_GEN_H_