summaryrefslogtreecommitdiff
path: root/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc')
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc137
1 files changed, 65 insertions, 72 deletions
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc
index 089c783c1..ae740bb10 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc
@@ -1,3 +1,18 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
#include "internal/layers/SimpleEmbeddingLookup.h"
#include <arm_compute/runtime/CL/CLScheduler.h>
@@ -6,7 +21,8 @@ void SimpleEmbeddingLookup::configure(::arm_compute::ITensor *lookups,
::arm_compute::ITensor *values,
::arm_compute::ITensor *output)
{
- // Assume that verification of operands are already done at Planner::visit()
+ assert(values->info()->num_dimensions() == output->info()->num_dimensions());
+ assert(values->info()->num_dimensions() > 1 && values->info()->num_dimensions() <= 4);
_lookups = lookups;
_values = values;
_output = output;
@@ -25,85 +41,62 @@ void SimpleEmbeddingLookup::run()
// type of elements of lookups is always integer
const int32_t *lookups_buf = reinterpret_cast<int32_t *>(_lookups->buffer());
- const auto values_buf = _values->buffer();
- auto output_buf = _output->buffer();
const auto lookups_info = _lookups->info();
const auto values_info = _values->info();
const auto output_info = _output->info();
- // TODO Refactor below duplicated code!
- const auto values_rank = values_info->num_dimensions();
- switch (values_rank)
+ // NOTE The first dimension's position is always at the end of dimensions.
+ const auto first_dim_pos = values_info->num_dimensions() - 1;
+
+ const size_t first_dim = values_info->dimension(first_dim_pos);
+ for (size_t i = 0; i < lookups_info->dimension(0); ++i)
{
- case 2:
- // (H,W) in nnapi -> (W,H) in acl
- {
- const size_t row_size = values_info->dimension(1);
- const size_t row_bytes = values_info->total_size() / row_size;
- for (size_t i = 0; i < lookups_info->dimension(0); ++i)
- {
- if (lookups_buf[i] < 0 || lookups_buf[i] >= row_size)
- throw std::runtime_error("Embedding Lookup: index out of bounds.");
-
- size_t idx = lookups_buf[i];
- size_t row_offset_by_idx = values_info->offset_element_in_bytes({0, idx});
- size_t row_offset_by_i = output_info->offset_element_in_bytes({0, i});
-
- unsigned char *sink_addr = output_buf + row_offset_by_i;
- unsigned char *source_addr = values_buf + row_offset_by_idx;
- memcpy(sink_addr, source_addr, row_bytes);
- }
- }
- break;
- case 3:
- // (B,H,W) in nnapi -> (W,H,B) in acl
- {
- const size_t row_size = values_info->dimension(2);
- const size_t row_bytes = values_info->total_size() / row_size;
- for (size_t i = 0; i < lookups_info->dimension(0); ++i)
- {
- if (lookups_buf[i] < 0 || lookups_buf[i] >= row_size)
- throw std::runtime_error("Embedding Lookup: index out of bounds.");
-
- size_t idx = lookups_buf[i];
- size_t row_offset_by_idx = values_info->offset_element_in_bytes({0, 0, idx});
- size_t row_offset_by_i = output_info->offset_element_in_bytes({0, 0, i});
-
- unsigned char *sink_addr = output_buf + row_offset_by_i;
- unsigned char *source_addr = values_buf + row_offset_by_idx;
- memcpy(sink_addr, source_addr, row_bytes);
- }
- }
- break;
- case 4:
- // (N,H,W,C) in nnapi -> (N,C,H,W) in acl
- {
- const size_t row_size = values_info->dimension(3);
- const size_t row_bytes = values_info->total_size() / row_size;
- for (size_t i = 0; i < lookups_info->dimension(0); ++i)
- {
- if (lookups_buf[i] < 0 || lookups_buf[i] >= row_size)
- throw std::runtime_error("Embedding Lookup: index out of bounds.");
-
- size_t idx = lookups_buf[i];
- size_t row_offset_by_idx = values_info->offset_element_in_bytes({0, 0, 0, idx});
- size_t row_offset_by_i = output_info->offset_element_in_bytes({0, 0, 0, i});
-
- unsigned char *sink_addr = output_buf + row_offset_by_i;
- unsigned char *source_addr = values_buf + row_offset_by_idx;
- memcpy(sink_addr, source_addr, row_bytes);
- }
- }
- break;
- case 1:
- // In this case, shape of values actually is matrix but the height(row size) is 1 in acl. If
- // row size is 1, this op is not needed and it means this situtation could be wrong.
- throw std::runtime_error("Wrong usage of EmbeddingLookup op!");
- default:
- throw std::runtime_error("Not supported rank!");
+ if (lookups_buf[i] < 0 || lookups_buf[i] >= first_dim)
+ throw std::runtime_error("Embedding Lookup: index out of bounds.");
}
+ // If each strides of values and output are different, applied padding size of the two tensors are
+ // different, therefore, it can not be copied at once.
+ auto can_copy_at_once = [&]() -> bool {
+ const auto &values_strides = values_info->strides_in_bytes();
+ const auto &output_strides = output_info->strides_in_bytes();
+
+ for (size_t i = 0; i < first_dim_pos; ++i)
+ {
+ if (values_strides[i] != values_strides[i])
+ return false;
+ }
+
+ return true;
+ };
+
+ using ::arm_compute::Window;
+ using ::arm_compute::Iterator;
+
+ size_t copy_bytes;
+ Window window;
+ if (can_copy_at_once())
+ {
+ copy_bytes = values_info->total_size() / first_dim;
+ window.use_tensor_dimensions(output_info->tensor_shape(), first_dim_pos);
+ }
+ else
+ {
+ copy_bytes = values_info->dimension(0) * values_info->element_size();
+ window.use_tensor_dimensions(output_info->tensor_shape(), Window::DimY);
+ }
+
+ Iterator it(_output, window);
+ execute_window_loop(window,
+ [&](const ::arm_compute::Coordinates &id) {
+ ::arm_compute::Coordinates values_id = id;
+ const int idx = id[first_dim_pos];
+ values_id.set(first_dim_pos, lookups_buf[idx]);
+ memcpy(it.ptr(), _values->ptr_to_element(values_id), copy_bytes);
+ },
+ it);
+
if (::internal::arm_compute::isGpuMode())
{
auto &q = ::arm_compute::CLScheduler::get().queue();