1 files changed, 165 insertions, 0 deletions
diff --git a/runtimes/neurun/backend/acl_cl/kernel/ConcatLayer.cc b/runtimes/neurun/backend/acl_cl/kernel/ConcatLayer.cc
new file mode 100644
index 000000000..aa1fd9aed
--- /dev/null
+++ b/runtimes/neurun/backend/acl_cl/kernel/ConcatLayer.cc
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConcatLayer.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+#include "util/feature/nchw/View.h"
+#include "util/logging.h"
+
+namespace
+{
+
+inline bool matchSizeExceptAxis(const ::neurun::backend::acl_cl::operand::ICLTensor *t1,
+                                const ::neurun::backend::acl_cl::operand::ICLTensor *t2,
+                                uint32_t axis)
+{
+  assert(t1->num_dimensions() <= 4);
+  assert(t2->num_dimensions() <= 4);
+
+  for (uint32_t i = 0; i < 4; i++)
+  {
+    if (axis == i)
+      continue;
+    if (t1->info()->dimension(i) != t2->info()->dimension(i))
+      return false;
+  }
+  return true;
+}
+
+} // namespace {anonymous}
+
+namespace neurun
+{
+namespace backend
+{
+namespace acl_cl
+{
+namespace kernel
+{
+
+ConcatLayer::ConcatLayer()
+    : _input_allocs(), _output_alloc(nullptr), _axis(0), _input_type(arm_compute::DataType::F32)
+{
+  // DO NOTHING
+}
+
+template <typename T> bool ConcatLayer::concatenate()
+{
+  // Input and output size check
+  {
+    // NOTE Support only tensor with dimension 4 or less
+
+    uint32_t axis_sum = 0;
+
+    for (auto input : _input_allocs)
+    {
+      assert(_output_alloc->ptr()->layout() == input->ptr()->layout());
+      assert(matchSizeExceptAxis(_output_alloc->ptr(), input->ptr(), _axis));
+      axis_sum += input->ptr()->info()->dimension(_axis);
+    }
+
+    assert(_output_alloc->ptr()->info()->dimension(_axis) == axis_sum);
+  }
+
+  VERBOSE(Concat_RUN) << "START Concat" << std::endl;
+
+  // Perform operation
+  {
+    uint32_t axis_offset = 0;
+
+    auto outout_fn = [&](::neurun::backend::operand::ITensor &out_tensor) {
+      for (auto input : _input_allocs)
+      {
+        auto &out_cl_tensor =
+            static_cast<::neurun::backend::acl_cl::operand::ICLTensor &>(out_tensor);
+        auto input_fn = [&](::neurun::backend::operand::ITensor &in_tensor) {
+          auto &in_cl_tensor =
+              static_cast<::neurun::backend::acl_cl::operand::ICLTensor &>(in_tensor);
+          for (uint32_t i = 0; i < in_cl_tensor.info()->dimension(0); i++)
+          {
+            for (uint32_t j = 0; j < in_cl_tensor.info()->dimension(1); j++)
+            {
+              for (uint32_t k = 0; k < in_cl_tensor.info()->dimension(2); k++)
+              {
+                for (uint32_t l = 0; l < in_cl_tensor.info()->dimension(3); l++)
+                {
+                  int32_t io = (_axis == 0) ? axis_offset : 0;
+                  int32_t jo = (_axis == 1) ? axis_offset : 0;
+                  int32_t ko = (_axis == 2) ? axis_offset : 0;
+                  int32_t lo = (_axis == 3) ? axis_offset : 0;
+                  T value =
+                      *reinterpret_cast<T *>(in_cl_tensor.handle()->ptr_to_element({i, j, k, l}));
+                  *reinterpret_cast<T *>(out_cl_tensor.handle()->ptr_to_element(
+                      {i + io, j + jo, k + ko, l + lo})) = value;
+                }
+              }
+            }
+          }
+          if (_axis == 0)
+            axis_offset += in_cl_tensor.info()->dimension(0);
+          if (_axis == 1)
+            axis_offset += in_cl_tensor.info()->dimension(1);
+          if (_axis == 2)
+            axis_offset += in_cl_tensor.info()->dimension(2);
+          if (_axis == 3)
+            axis_offset += in_cl_tensor.info()->dimension(3);
+        };
+        input->access(input_fn);
+      }
+    };
+    _output_alloc->access(outout_fn);
+  }
+
+  VERBOSE(Concat_RUN) << "End   Concat" << std::endl;
+
+  return true;
+}
+
+void ConcatLayer::configure(
+    const std::vector<::neurun::backend::acl_cl::operand::Object *> &input_allocs, int32_t axis,
+    ::neurun::backend::acl_cl::operand::Object *output_alloc)
+{
+  _input_allocs = input_allocs;
+  _output_alloc = output_alloc;
+
+  assert(axis < 4);
+
+  // TODO Handle when axis is negative
+  assert(axis >= 0);
+
+  _axis = axis;
+
+  _input_type = input_allocs[0]->ptr()->data_type();
+}
+
+void ConcatLayer::run()
+{
+  if (_input_type == arm_compute::DataType::F32)
+  {
+    concatenate<float>();
+  }
+  else if (_input_type == arm_compute::DataType::QASYMM8)
+  {
+    concatenate<uint8_t>();
+  }
+}
+
+} // namespace kernel
+} // namespace acl_cl
+} // namespace backend
+} // namespace neurun