diff options
Diffstat (limited to 'runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc')
-rw-r--r-- | runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc | 140 |
1 files changed, 140 insertions, 0 deletions
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc new file mode 100644 index 000000000..7f8ae2505 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/layers/SimpleHashtableLookupLayer.h" + +#include <arm_compute/runtime/CL/CLScheduler.h> + +void SimpleHashtableLookupLayer::configure(::arm_compute::ITensor *lookups, + ::arm_compute::ITensor *keys, + ::arm_compute::ITensor *values, + ::arm_compute::ITensor *output, + ::arm_compute::ITensor *hits) +{ + _lookups = lookups; + _keys = keys; + _values = values; + _output = output; + _hits = hits; + _lookup_indices.resize(lookups->info()->dimension(0), -1); +} + +void SimpleHashtableLookupLayer::run() +{ + auto &queue = ::arm_compute::CLScheduler::get().queue(); + if (::internal::arm_compute::isGpuMode()) + { + CAST_CL(_lookups)->map(queue); + CAST_CL(_keys)->map(queue); + CAST_CL(_values)->map(queue); + CAST_CL(_output)->map(queue); + CAST_CL(_hits)->map(queue); + } + + const int32_t *lookups_buf = reinterpret_cast<int32_t *>(_lookups->buffer()); + const int32_t *keys_buf = reinterpret_cast<int32_t *>(_keys->buffer()); + uint8_t *hits_buf = reinterpret_cast<uint8_t *>(_hits->buffer()); + + const auto lookups_info = _lookups->info(); + const auto values_info = _values->info(); + const auto keys_info = _keys->info(); + const auto output_info = _output->info(); + + // NOTE The first dimension's position must be always at the end of dimensions. + const auto first_dim_pos = values_info->num_dimensions() - 1; + const size_t first_dim = values_info->dimension(first_dim_pos); + + std::map<int32_t, size_t> key_map; + const int keys_num = keys_info->dimension(0); + for (size_t key_index = 0; key_index < keys_num; key_index++) + { + key_map[keys_buf[key_index]] = key_index; + } + + const int lookups_num = lookups_info->dimension(0); + for (size_t i = 0; i < lookups_num; ++i) + { + const auto lookup_value = lookups_buf[i]; + const auto it = key_map.find(lookup_value); + if (it != key_map.end()) + { + if (it->second >= first_dim) + throw std::runtime_error("HashTable Lookup: index out of bounds."); + _lookup_indices[i] = it->second; + } + } + + // If each strides of values and output are different, applied padding size of the two tensors are + // different, therefore, it can not be copied at once. + auto can_copy_at_once = [&]() -> bool { + const auto &values_strides = values_info->strides_in_bytes(); + const auto &output_strides = output_info->strides_in_bytes(); + + for (size_t i = 0; i < first_dim_pos; ++i) + { + if (values_strides[i] != values_strides[i]) + return false; + } + + return true; + }; + + using ::arm_compute::Window; + using ::arm_compute::Iterator; + using ::arm_compute::Coordinates; + + size_t copy_bytes; + Window window; + if (can_copy_at_once()) + { + copy_bytes = values_info->total_size() / first_dim; + window.use_tensor_dimensions(output_info->tensor_shape(), first_dim_pos); + } + else + { + copy_bytes = values_info->dimension(0) * values_info->element_size(); + window.use_tensor_dimensions(output_info->tensor_shape(), Window::DimY); + } + + Iterator it(_output, window); + execute_window_loop(window, + [&](const Coordinates &id) { + Coordinates values_id = id; + const int idx = id[first_dim_pos]; + const int lookup_index = _lookup_indices[idx]; + if (lookup_index >= 0) + { + values_id.set(first_dim_pos, lookup_index); + memcpy(it.ptr(), _values->ptr_to_element(values_id), copy_bytes); + hits_buf[lookup_index] = 1; + } + else + { + memset(it.ptr(), 0, copy_bytes); + hits_buf[lookup_index] = 0; + } + }, + it); + + if (::internal::arm_compute::isGpuMode()) + { + CAST_CL(_lookups)->unmap(queue); + CAST_CL(_keys)->unmap(queue); + CAST_CL(_values)->unmap(queue); + CAST_CL(_output)->unmap(queue); + CAST_CL(_hits)->unmap(queue); + } +} |