1 files changed, 140 insertions, 0 deletions
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc
new file mode 100644
index 000000000..7f8ae2505
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleHashtableLookupLayer.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleHashtableLookupLayer::configure(::arm_compute::ITensor *lookups,
+                                           ::arm_compute::ITensor *keys,
+                                           ::arm_compute::ITensor *values,
+                                           ::arm_compute::ITensor *output,
+                                           ::arm_compute::ITensor *hits)
+{
+  _lookups = lookups;
+  _keys = keys;
+  _values = values;
+  _output = output;
+  _hits = hits;
+  _lookup_indices.resize(lookups->info()->dimension(0), -1);
+}
+
+void SimpleHashtableLookupLayer::run()
+{
+  auto &queue = ::arm_compute::CLScheduler::get().queue();
+  if (::internal::arm_compute::isGpuMode())
+  {
+    CAST_CL(_lookups)->map(queue);
+    CAST_CL(_keys)->map(queue);
+    CAST_CL(_values)->map(queue);
+    CAST_CL(_output)->map(queue);
+    CAST_CL(_hits)->map(queue);
+  }
+
+  const int32_t *lookups_buf = reinterpret_cast<int32_t *>(_lookups->buffer());
+  const int32_t *keys_buf = reinterpret_cast<int32_t *>(_keys->buffer());
+  uint8_t *hits_buf = reinterpret_cast<uint8_t *>(_hits->buffer());
+
+  const auto lookups_info = _lookups->info();
+  const auto values_info = _values->info();
+  const auto keys_info = _keys->info();
+  const auto output_info = _output->info();
+
+  // NOTE The first dimension's position must be always at the end of dimensions.
+  const auto first_dim_pos = values_info->num_dimensions() - 1;
+  const size_t first_dim = values_info->dimension(first_dim_pos);
+
+  std::map<int32_t, size_t> key_map;
+  const int keys_num = keys_info->dimension(0);
+  for (size_t key_index = 0; key_index < keys_num; key_index++)
+  {
+    key_map[keys_buf[key_index]] = key_index;
+  }
+
+  const int lookups_num = lookups_info->dimension(0);
+  for (size_t i = 0; i < lookups_num; ++i)
+  {
+    const auto lookup_value = lookups_buf[i];
+    const auto it = key_map.find(lookup_value);
+    if (it != key_map.end())
+    {
+      if (it->second >= first_dim)
+        throw std::runtime_error("HashTable Lookup: index out of bounds.");
+      _lookup_indices[i] = it->second;
+    }
+  }
+
+  // If each strides of values and output are different, applied padding size of the two tensors are
+  // different, therefore, it can not be copied at once.
+  auto can_copy_at_once = [&]() -> bool {
+    const auto &values_strides = values_info->strides_in_bytes();
+    const auto &output_strides = output_info->strides_in_bytes();
+
+    for (size_t i = 0; i < first_dim_pos; ++i)
+    {
+      if (values_strides[i] != values_strides[i])
+        return false;
+    }
+
+    return true;
+  };
+
+  using ::arm_compute::Window;
+  using ::arm_compute::Iterator;
+  using ::arm_compute::Coordinates;
+
+  size_t copy_bytes;
+  Window window;
+  if (can_copy_at_once())
+  {
+    copy_bytes = values_info->total_size() / first_dim;
+    window.use_tensor_dimensions(output_info->tensor_shape(), first_dim_pos);
+  }
+  else
+  {
+    copy_bytes = values_info->dimension(0) * values_info->element_size();
+    window.use_tensor_dimensions(output_info->tensor_shape(), Window::DimY);
+  }
+
+  Iterator it(_output, window);
+  execute_window_loop(window,
+                      [&](const Coordinates &id) {
+                        Coordinates values_id = id;
+                        const int idx = id[first_dim_pos];
+                        const int lookup_index = _lookup_indices[idx];
+                        if (lookup_index >= 0)
+                        {
+                          values_id.set(first_dim_pos, lookup_index);
+                          memcpy(it.ptr(), _values->ptr_to_element(values_id), copy_bytes);
+                          hits_buf[lookup_index] = 1;
+                        }
+                        else
+                        {
+                          memset(it.ptr(), 0, copy_bytes);
+                          hits_buf[lookup_index] = 0;
+                        }
+                      },
+                      it);
+
+  if (::internal::arm_compute::isGpuMode())
+  {
+    CAST_CL(_lookups)->unmap(queue);
+    CAST_CL(_keys)->unmap(queue);
+    CAST_CL(_values)->unmap(queue);
+    CAST_CL(_output)->unmap(queue);
+    CAST_CL(_hits)->unmap(queue);
+  }
+}