diff options
Diffstat (limited to 'runtimes/pure_arm_compute/src/internal')
44 files changed, 50 insertions, 2920 deletions
diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.cc b/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.cc index ff2f79309..1a5c735ee 100644 --- a/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.cc +++ b/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.cc @@ -55,36 +55,16 @@ int new_pv[4] = {0}; ::arm_compute::Coordinates axises = getARMComputeAxises(rank); - if (rank == 4) + for (uint32_t i = 0; i < rank; ++i) { - /** - axises = {3,1,0,2} - NNAPI PermutationVector - N 0 3 - H 1 1 - W 2 0 - C 3 2 - **/ - new_pv[0] = axises[runtime_pv[2]]; - new_pv[1] = axises[runtime_pv[1]]; - new_pv[2] = axises[runtime_pv[3]]; - new_pv[3] = axises[runtime_pv[0]]; - } - else - { - /** - mapping/axises = {rank-1 to 0} - CHW --------> WHC - or - WH ----------> HW - **/ - for (int id = 0; id < rank; ++id) - { - new_pv[id] = axises[runtime_pv[rank - id - 1]]; - } + new_pv[axises[i]] = ToARMComputeAxis(rank, runtime_pv[i]).value(); } - return ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]}; + ::arm_compute::PermutationVector ACL_PV = + ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]}; + ACL_PV.set_num_dimensions(rank); + + return ACL_PV; } ::arm_compute::TensorShape asTensorShape(const internal::tflite::operand::Shape &shape, @@ -163,3 +143,10 @@ return ::arm_compute::TensorInfo(shape, 1, asDataType(type), asQuantizationInfo(scale, zeroPoint)); } + +::arm_compute::TensorInfo asTensorInfo(const ::arm_compute::TensorShape &shape, + const ::arm_compute::DataType &type, const float scale, + const int32_t zeroPoint) +{ + return ::arm_compute::TensorInfo(shape, 1, type, asQuantizationInfo(scale, zeroPoint)); +} diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h b/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h index 42b547feb..211a6ac87 100644 --- a/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h +++ b/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h @@ -100,6 +100,18 @@ const float scale = 0.0f, const int32_t zeroPoint = 0); /** + * @brief Cast from internal tensor info to tensor info object of arm compute + * @param[in] shape Tensor shape + * @param[in] type Tensor type of arm compute + * @param[in] scale Scale of tensor quantization + * @param[in] zeroPoint Zeropoint of tensor quantization + * @return TensorInfo object of arm compute + */ +::arm_compute::TensorInfo asTensorInfo(const ::arm_compute::TensorShape &shape, + const ::arm_compute::DataType &type, const float scale, + const int32_t zeroPoint); + +/** * @brief Set value to arm compute tensor with casting * @param[in] value Value to set * @param[out] to Target tensor of arm compute diff --git a/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h b/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h deleted file mode 100644 index 83ae7c17b..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file FeatureLoggingLayer.h - * @brief This file contains FeatureLoggingLayer class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __FEATURE_LOGGING_LAYER_H__ -#define __FEATURE_LOGGING_LAYER_H__ - -#include <arm_compute/core/ITensor.h> -#include <arm_compute/runtime/IFunction.h> -#include <arm_compute/runtime/CL/CLScheduler.h> - -#include <iostream> -#include <iomanip> -#include <limits> - -#include "internal/arm_compute.h" - -/** - * @brief Class to run FeatureLogging Layer - */ -class FeatureLoggingLayer : public ::arm_compute::IFunction -{ -public: - FeatureLoggingLayer(void) : _tag(""), _target(nullptr) - { - // DO NOTHING - } - -public: - /** - * @brief Configure the layer - * @param[in] tag Text tag for this layer - * @param[in] target The feature tensor to be printed - * @return N/A - */ - void configure(const std::string &tag, ::arm_compute::ITensor *target) - { - _tag = tag; - _target = target; - } - -public: - /** - * @brief Run the operation. Must be called after configure(). - * @return N/A - */ - void run(void) override - { - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - CAST_CL(_target)->map(q); - } - - const size_t W = _target->info()->dimension(0); - const size_t H = _target->info()->dimension(1); - const size_t C = _target->info()->dimension(2); - - std::cout << _tag << std::endl; - - for (size_t ch = 0; ch < C; ++ch) - { - std::cout << "Channel #" << ch << std::endl; - for (size_t row = 0; row < H; ++row) - { - for (size_t col = 0; col < W; ++col) - { - const arm_compute::Coordinates id{col, row, ch}; - const auto value = *reinterpret_cast<float *>(_target->ptr_to_element(id)); - - // TODO Generalize this to integer types - std::cout << std::setprecision(2); - std::cout << std::setw(7); - std::cout << std::setfill(' '); - std::cout << std::fixed; - std::cout << value << " "; - } - std::cout << std::endl; - } - std::cout << std::endl; - } - - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - CAST_CL(_target)->unmap(q); - } - } - -private: - std::string _tag; - ::arm_compute::ITensor *_target; -}; - -#endif // __FEATURE_LOGGING_LAYER_H__ diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc deleted file mode 100644 index 28789a801..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenericFullyConnectedLayer.h" -#include "internal/arm_compute.h" - -void GenericFullyConnectedLayer::configure(::arm_compute::ITensor *input, - ::arm_compute::ITensor *weights, - ::arm_compute::ITensor *biases, - ::arm_compute::ITensor *output, bool needs_reshape, - ::arm_compute::TensorShape reshape) -{ - _input = input; - _weights = weights; - _biases = biases; - _output = output; - _needs_reshape = needs_reshape; - - // TODO Too many duplicated code. Revise below code. - if (::internal::arm_compute::isGpuMode()) - { - if (_needs_reshape) - { - // reshape - auto_init_if_empty(*_cl_buffer.info(), _input->info()->clone()->set_tensor_shape(reshape)); - _generic_reshape.configure(CAST_CL(_input), &_cl_buffer); - - _cl_fc.configure(&_cl_buffer, CAST_CL(_weights), CAST_CL(_biases), CAST_CL(_output)); - - // NOTE _cl_buffer is inaccessible from outside, and thus it is safe to invoke allocate here. - _cl_buffer.allocator()->allocate(); - } - else - { - _cl_fc.configure(CAST_CL(_input), CAST_CL(_weights), CAST_CL(_biases), CAST_CL(_output)); - } - } - else - { - if (_needs_reshape) - { - // reshape - auto_init_if_empty(*_neon_buffer.info(), _input->info()->clone()->set_tensor_shape(reshape)); - _generic_reshape.configure(_input, &_neon_buffer); - - _neon_fc.configure(&_neon_buffer, _weights, _biases, _output); - - // NOTE _neon_buffer is inaccessible from outside, and thus it is safe to invoke allocate - // here. - _neon_buffer.allocator()->allocate(); - } - else - { - _neon_fc.configure(_input, _weights, _biases, _output); - } - } -} - -void GenericFullyConnectedLayer::run(void) -{ - if (::internal::arm_compute::isGpuMode()) - { - if (_needs_reshape) - _generic_reshape.run(); - - _cl_fc.run(); - } - else - { - if (_needs_reshape) - _generic_reshape.run(); - - _neon_fc.run(); - } -} diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h deleted file mode 100644 index f1519f54d..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file GenericFullyConnectedLayer.h - * @brief This file contains GenericFullyConnectedLayer class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __GENERIC_FULLY_CONNECTED_LAYER_H__ -#define __GENERIC_FULLY_CONNECTED_LAYER_H__ - -#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h> -#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h> -#include "internal/layers/GenericReshapeLayer.h" - -/** - * @brief Class to run FullyConnected Layer with both CPU and GPU - */ -class GenericFullyConnectedLayer : public ::arm_compute::IFunction -{ -public: - GenericFullyConnectedLayer(void) - : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{}, - _neon_buffer{}, _cl_fc{}, _neon_fc{}, _generic_reshape{}, _needs_reshape(false) - { - // DO NOTHING - } - -public: - /** - * @brief Configure the layer - * @param[in] input The source tensor - * @param[in] weights The tensor that is filled with weight values - * @param[in] biases The tensor that is filled with biase values - * @param[in] output The destination tensor - * @param[in] needs_reshape Whether it needs to be reshaped or not - * @param[in] reshape The tensor shape to be reshaped. Only valid when needs_reshape is true. - * @return N/A - */ - void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *weights, - ::arm_compute::ITensor *biases, ::arm_compute::ITensor *output, bool needs_reshape, - ::arm_compute::TensorShape reshape); - -public: - /** - * @brief Run the operation. Must be called after configure(). - * @return N/A - */ - void run(void) override; - -private: - ::arm_compute::ITensor *_input; - ::arm_compute::ITensor *_weights; - ::arm_compute::ITensor *_biases; - ::arm_compute::ITensor *_output; - - // buffer for reshaping input tensor - ::arm_compute::CLTensor _cl_buffer; - ::arm_compute::Tensor _neon_buffer; - -private: - ::arm_compute::CLFullyConnectedLayer _cl_fc; - ::arm_compute::NEFullyConnectedLayer _neon_fc; - GenericReshapeLayer _generic_reshape; - bool _needs_reshape; -}; - -#endif // __GENERIC_FULLY_CONNECTED_LAYER_H__ diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc deleted file mode 100644 index c38c2e9e3..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenericReshapeLayer.h" -#include "internal/arm_compute.h" - -void GenericReshapeLayer::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output) -{ - _input = input; - _output = output; - - // NOTE This vector comes from CLPermuteKernel implementation - // - // This implementation permutes a tensor of shape W / H / C into another tensor of shape C / W / H - // - // Original | Permuted - // 0 | W | C (from 2) - // 1 | H | W (from 0) - // 2 | C | H (from 1) - // - const ::arm_compute::PermutationVector pv{2, 0, 1}; - - if (::internal::arm_compute::isGpuMode()) - { - _cl_permute.configure(CAST_CL(input), &_cl_permuted, pv); - _cl_reshape.configure(&_cl_permuted, CAST_CL(output)); - - // NOTE _permuted is inaccessible from outside, and thus it is safe to invoke allocate here. - _cl_permuted.allocator()->allocate(); - } - else - { - _neon_permute.configure(input, &_neon_permuted, pv); - _neon_reshape.configure(&_neon_permuted, output); - - // NOTE _permuted is inaccessible from outside, and thus it is safe to invoke allocate here. - _neon_permuted.allocator()->allocate(); - } -} - -void GenericReshapeLayer::run(void) -{ - if (::internal::arm_compute::isGpuMode()) - { - _cl_permute.run(); - _cl_reshape.run(); - } - else - { - _neon_permute.run(); - _neon_reshape.run(); - } -} diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h deleted file mode 100644 index a22c14c8b..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file GenericReshapeLayer.h - * @brief This file contains GenericReshapeLayer class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __GENERIC_RESHAPE_LAYER_H__ -#define __GENERIC_RESHAPE_LAYER_H__ - -#include <arm_compute/runtime/Tensor.h> -#include <arm_compute/runtime/CL/CLTensor.h> - -#include <arm_compute/runtime/CL/functions/CLPermute.h> -#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h> -#include <arm_compute/runtime/NEON/functions/NEPermute.h> -#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h> - -/** - * @brief Class to run Reshape Layer with both CPU and GPU - */ -class GenericReshapeLayer : public ::arm_compute::IFunction -{ -public: - GenericReshapeLayer(void) - : _input(nullptr), _output(nullptr), _cl_permuted{}, _neon_permuted{}, _cl_permute{}, - _cl_reshape{}, _neon_permute{}, _neon_reshape{} - { - // DO NOTHING - } - -public: - /** - * @brief Configure the layer - * @param[in] input The source tensor - * @param[in] output The destination tensor - * @return N/A - */ - void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output); - -public: - /** - * @brief Run the operation. Must be called after configure(). - * @return N/A - */ - void run(void) override; - -private: - ::arm_compute::ITensor *_input; - ::arm_compute::ITensor *_output; - ::arm_compute::CLTensor _cl_permuted; - ::arm_compute::Tensor _neon_permuted; - -private: - ::arm_compute::CLPermute _cl_permute; - ::arm_compute::CLReshapeLayer _cl_reshape; - - ::arm_compute::NEPermute _neon_permute; - ::arm_compute::NEReshapeLayer _neon_reshape; -}; - -#endif // __GENERIC_RESHAPE_LAYER_H__ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.cc deleted file mode 100644 index 6d348e814..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.cc +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "internal/layers/SimpleArgMinMax.h" -#include <arm_compute/runtime/CL/CLScheduler.h> - -void SimpleArgMinMax::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, - std::vector<uint32_t> axis, ::arm_compute::ArgOperation op) -{ - _input = input; - _output = output; - _axis = axis; - _input_rank = input->info()->num_dimensions(); - _op_type = op; -} - -inline const ::arm_compute::TensorShape -inferOutputShape(const ::arm_compute::TensorShape &input_shape, const std::vector<uint32_t> &axis, - int input_rank) -{ - ::arm_compute::TensorShape out_shape{}; - size_t dim = 1; - for (int i = 0; i < input_rank; ++i) - { - dim = input_shape[i]; - out_shape.set(i, dim); - } - - for (int i = 0; i < axis.size(); ++i) - { - out_shape.set(axis[i], 1); - } - - return out_shape; -} - -template <typename T> -inline T getArgMinMaxEle(const ::arm_compute::ITensor *input, - const ::arm_compute::TensorShape &input_shape, - const ::arm_compute::TensorShape &output_shape, const size_t b, - const size_t d, const size_t h, const size_t w, const int axis, - const ::arm_compute::ArgOperation op_type) -{ - // If output[dimention] == 1, will check all values of that dimension because of reducing - // dimension. - // Else will check only one value. - const size_t start_b = output_shape[3] == 1 ? 0 : b; - const size_t start_d = output_shape[2] == 1 ? 0 : d; - const size_t start_h = output_shape[1] == 1 ? 0 : h; - const size_t start_w = output_shape[0] == 1 ? 0 : w; - const size_t stop_b = output_shape[3] == 1 ? input_shape[3] - 1 : b; - const size_t stop_d = output_shape[2] == 1 ? input_shape[2] - 1 : d; - const size_t stop_h = output_shape[1] == 1 ? input_shape[1] - 1 : h; - const size_t stop_w = output_shape[0] == 1 ? input_shape[0] - 1 : w; - - ::arm_compute::Coordinates id{w, h, d, b}; - ::arm_compute::Coordinates min_max_id{w, h, d, b}; - - T value = *reinterpret_cast<T *>(input->ptr_to_element(id)); - T tval = *reinterpret_cast<T *>(input->ptr_to_element(id)); - - for (size_t in_b = start_b; in_b <= stop_b; ++in_b) - { - id.set(3, in_b); - for (size_t in_d = start_d; in_d <= stop_d; ++in_d) - { - id.set(2, in_d); - for (size_t in_h = start_h; in_h <= stop_h; ++in_h) - { - id.set(1, in_h); - for (size_t in_w = start_w; in_w <= stop_w; ++in_w) - { - id.set(0, in_w); - if (op_type == ::arm_compute::ArgOperation::MIN) - { - value = std::min<T>(value, *reinterpret_cast<T *>(input->ptr_to_element(id))); - } - else if (op_type == ::arm_compute::ArgOperation::MAX) - { - value = std::max<T>(value, *reinterpret_cast<T *>(input->ptr_to_element(id))); - } - else - throw std::runtime_error("This Arg operation is not supported, yet"); - - if (tval != value) - { - min_max_id = id; - tval = value; - } - } - } - } - } - - return min_max_id[axis]; -} - -template <typename T> -inline void -getArgMinMax(const ::arm_compute::ITensor *input, const ::arm_compute::TensorShape &input_shape, - const ::arm_compute::TensorShape &output_shape, ::arm_compute::ITensor *output, - const int axis, const ::arm_compute::ArgOperation op_type) -{ - ::arm_compute::Coordinates id; - for (size_t out_b = 0; out_b < output_shape[3]; ++out_b) - { - id.set(3, out_b); - for (size_t out_d = 0; out_d < output_shape[2]; ++out_d) - { - id.set(2, out_d); - for (size_t out_h = 0; out_h < output_shape[1]; ++out_h) - { - id.set(1, out_h); - for (size_t out_w = 0; out_w < output_shape[0]; ++out_w) - { - id.set(0, out_w); - *reinterpret_cast<int *>(output->ptr_to_element(id)) = getArgMinMaxEle<T>( - input, input_shape, output_shape, out_b, out_d, out_h, out_w, axis, op_type); - } - } - } - } -} - -void SimpleArgMinMax::run() -{ - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_input)->map(q); - CAST_CL(_output)->map(q); - } - - ::arm_compute::TensorShape input_shape = _input->info()->tensor_shape(); - - // Axis dimension is 1 and size is 1. - // TODO support axis size > 1. - int axis_val = _axis[0]; - ::arm_compute::TensorShape output_shape = inferOutputShape(input_shape, _axis, _input_rank); - - _output->info()->set_tensor_shape(output_shape); - switch (_input->info()->data_type()) - { - case ::arm_compute::DataType::QASYMM8: - getArgMinMax<uint8_t>(_input, input_shape, output_shape, _output, axis_val, _op_type); - break; - case ::arm_compute::DataType::S32: - getArgMinMax<int32_t>(_input, input_shape, output_shape, _output, axis_val, _op_type); - break; - case ::arm_compute::DataType::F32: - getArgMinMax<float>(_input, input_shape, output_shape, _output, axis_val, _op_type); - break; - default: - ARM_COMPUTE_ERROR("DataType not supported"); - break; - } - - _output->info()->set_tensor_shape(output_shape); - - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_input)->unmap(q); - CAST_CL(_output)->unmap(q); - } -} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.h deleted file mode 100644 index b90e74579..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __SIMPLE_ARG_MIN_MAX_H__ -#define __SIMPLE_ARG_MIN_MAX_H__ - -#include "internal/arm_compute.h" -#include "arm_compute/core/TypesEx.h" - -class SimpleArgMinMax : public ::arm_compute::IFunction -{ -public: - SimpleArgMinMax(void) : _input(nullptr), _output(nullptr), _axis(), _input_rank(0) - { - // DO NOTHING - } - -public: - /** Initialise input and output - * - * @param[in] input First tensor input. - * @param[out] output Output tensor. - * @param[in] axis Dimension along which to find Min or Max Index. - */ - void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, - std::vector<uint32_t> axis, ::arm_compute::ArgOperation _op_type); - - void run() override; - -private: - ::arm_compute::ITensor *_input; - ::arm_compute::ITensor *_output; - std::vector<uint32_t> _axis; - int _input_rank; - ::arm_compute::ArgOperation _op_type; -}; - -#endif /*__SIMPLE_ARG_MIN_MAX_H__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h deleted file mode 100644 index aed9ae286..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file SimpleArithmeticAddition.h - * @brief This file contains SimpleArithmeticAddition class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __SIMPLE_ARITHMETIC_ADDITION_H__ -#define __SIMPLE_ARITHMETIC_ADDITION_H__ - -#include "internal/arm_compute.h" -#include <arm_compute/core/ITensor.h> - -/** - * @brief Class to run SimpleArithmeticAddition Layer - */ -class SimpleArithmeticAddition : public ::arm_compute::IFunction -{ -public: - SimpleArithmeticAddition(void) : _lhs(nullptr), _rhs(nullptr), _out(nullptr) - { - // DO NOTHING - } - - /** - * @brief Configure the layer - * @param[in] lhs Lefthand-side operand - * @param[in] rhs Righthand-side operand - * @param[in] out The destination tensor(Result operand) - * @return N/A - */ - void configure(::arm_compute::ITensor *lhs, ::arm_compute::ITensor *rhs, - ::arm_compute::ITensor *out) - { - _lhs = lhs; - _rhs = rhs; - _out = out; - } - -public: - /** - * @brief Run the operation. Must be called after configure(). - * @return N/A - */ - void run(void) override - { - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_lhs)->map(q); - CAST_CL(_rhs)->map(q); - CAST_CL(_out)->map(q); - } - - arm_compute::Window window; - window.use_tensor_dimensions(_out->info()->tensor_shape()); - - execute_window_loop(window, [this](const arm_compute::Coordinates &id) { - // NOTE Must be two input tensors of identical type - // Must be output tensor of the same type as input0. - assert(_lhs->info()->data_type() == _rhs->info()->data_type()); - assert(_lhs->info()->data_type() == _out->info()->data_type()); - - switch (_lhs->info()->data_type()) - { - case ::arm_compute::DataType::F32: - { - const auto lhs_value = *reinterpret_cast<float *>(_lhs->ptr_to_element(id)); - const auto rhs_value = *reinterpret_cast<float *>(_rhs->ptr_to_element(id)); - *reinterpret_cast<float *>(_out->ptr_to_element(id)) = lhs_value + rhs_value; - break; - } - case ::arm_compute::DataType::S32: - { - const auto lhs_value = *reinterpret_cast<int32_t *>(_lhs->ptr_to_element(id)); - const auto rhs_value = *reinterpret_cast<int32_t *>(_rhs->ptr_to_element(id)); - *reinterpret_cast<int32_t *>(_out->ptr_to_element(id)) = lhs_value + rhs_value; - break; - } - case ::arm_compute::DataType::U32: - { - const auto lhs_value = *reinterpret_cast<uint32_t *>(_lhs->ptr_to_element(id)); - const auto rhs_value = *reinterpret_cast<uint32_t *>(_rhs->ptr_to_element(id)); - *reinterpret_cast<uint32_t *>(_out->ptr_to_element(id)) = lhs_value + rhs_value; - break; - } - case ::arm_compute::DataType::QASYMM8: - { - const auto lhs_value = *reinterpret_cast<uint8_t *>(_lhs->ptr_to_element(id)); - const auto rhs_value = *reinterpret_cast<uint8_t *>(_rhs->ptr_to_element(id)); - // How to handle with overflow? - *reinterpret_cast<uint8_t *>(_out->ptr_to_element(id)) = lhs_value + rhs_value; - break; - } - default: - throw std::runtime_error("Not supported, yet"); - break; - } - }); - - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_out)->unmap(q); - CAST_CL(_rhs)->unmap(q); - CAST_CL(_lhs)->unmap(q); - } - } - -private: - ::arm_compute::ITensor *_lhs; - ::arm_compute::ITensor *_rhs; - ::arm_compute::ITensor *_out; -}; - -#endif // __SIMPLE_ARITHMETIC_ADDITION_H__ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc deleted file mode 100644 index 87175ee1a..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "internal/layers/SimpleBatchToSpaceNd.h" - -#include <arm_compute/runtime/CL/CLScheduler.h> - -void SimpleBatchToSpaceND::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, - const int32_t *block_size, - const ::arm_compute::Coordinates &axises) -{ - const auto rank = axises.num_dimensions(); - assert(rank == 4); - - for (int i = 0; i < rank; ++i) - assert(axises[i] >= 0 && axises[i] < rank); - - _input = input; - _output = output; - _block_size = block_size; - _axises = axises; -} - -template <typename T> -inline void BatchToSpaceND(const ::arm_compute::ITensor *input, - const ::arm_compute::TensorShape &input_shape, - const int32_t *block_size_data, ::arm_compute::ITensor *output, - const ::arm_compute::TensorShape &output_shape, - const ::arm_compute::Coordinates &axises) -{ - const int output_batch = output_shape[axises[0]]; - const int output_height = output_shape[axises[1]]; - const int output_width = output_shape[axises[2]]; - const int depth = output_shape[axises[3]]; - - for (int out_b = 0; out_b < output_batch; ++out_b) - { - for (int out_h = 0; out_h < output_height; ++out_h) - { - for (int out_w = 0; out_w < output_width; ++out_w) - { - for (int out_d = 0; out_d < depth; ++out_d) - { - const int in_d = out_d; - const int in_h = out_h / block_size_data[0]; - const int in_w = out_w / block_size_data[1]; - const int in_b = - out_b + - ((out_h % block_size_data[0]) * block_size_data[1] + out_w % block_size_data[1]) * - output_batch; - - auto input_id = - asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises); - auto output_id = asARMComputeCoordinates( - ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises); - - *reinterpret_cast<T *>(output->ptr_to_element(output_id)) = - *reinterpret_cast<T *>(input->ptr_to_element(input_id)); - } - } - } - } -} -void SimpleBatchToSpaceND::run() -{ - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_input)->map(q); - CAST_CL(_output)->map(q); - } - - switch (_input->info()->data_type()) - { - case ::arm_compute::DataType::U8: - case ::arm_compute::DataType::QASYMM8: - BatchToSpaceND<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output, - _output->info()->tensor_shape(), _axises); - break; - case ::arm_compute::DataType::F32: - BatchToSpaceND<float>(_input, _input->info()->tensor_shape(), _block_size, _output, - _output->info()->tensor_shape(), _axises); - break; - default: - ARM_COMPUTE_ERROR("DataType not supported"); - break; - } - - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_input)->unmap(q); - CAST_CL(_output)->unmap(q); - } -} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h deleted file mode 100644 index 5695d9719..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - *Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __SIMPLE_BATCH_TO_SPACE_ND_H__ -#define __SIMPLE_BATCH_TO_SPACE_ND_H__ - -#include "internal/arm_compute.h" -#include "internal/arm_compute/Cast.h" - -class SimpleBatchToSpaceND : public ::arm_compute::IFunction -{ -public: - SimpleBatchToSpaceND(void) : _input(nullptr), _output(nullptr), _block_size(nullptr), _axises{} - { - // DO NOTHING - } - - /** Initialise input and output - * - * @param[in] input First tensor input. - * @param[out] output Output tensor. - * @param[in] block_size Block size. - * @param[in] axises Axises of rank 4 - */ - void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, - const int32_t *block_size, - const ::arm_compute::Coordinates &axises = getARMComputeAxises(4)); - - void run() override; - -private: - ::arm_compute::ITensor *_input; - ::arm_compute::ITensor *_output; - const int32_t *_block_size; - ::arm_compute::Coordinates _axises; -}; - -#endif /*__SIMPLE_BATCH_TO_SPACE_ND_H__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.cc deleted file mode 100644 index 7c7706a78..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.cc +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "internal/layers/SimpleCastLayer.h" - -#include <arm_compute/runtime/CL/CLScheduler.h> - -void SimpleCastLayer::castData(::arm_compute::ITensor *in, ::arm_compute::ITensor *out, - const arm_compute::Coordinates &id) -{ - switch (in->info()->data_type()) - { - case ::arm_compute::DataType::F32: - { - copyCast(*reinterpret_cast<float *>(in->ptr_to_element(id)), out, id); - break; - } - case ::arm_compute::DataType::S32: - { - copyCast(*reinterpret_cast<int32_t *>(in->ptr_to_element(id)), out, id); - break; - } - case ::arm_compute::DataType::U32: - { - copyCast(*reinterpret_cast<uint32_t *>(in->ptr_to_element(id)), out, id); - break; - } - case ::arm_compute::DataType::QASYMM8: - { - const uint8_t quantizedValue = *(in->ptr_to_element(id)); - copyCast(in->info()->quantization_info().dequantize(quantizedValue), out, id); - break; - } - default: - throw std::runtime_error("Not supported, yet"); - break; - } -} - -void SimpleCastLayer::configure(::arm_compute::ITensor *in, ::arm_compute::ITensor *out) -{ - _in = in; - _out = out; -} - -void SimpleCastLayer::run(void) -{ - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - CAST_CL(_in)->map(q); - CAST_CL(_out)->map(q); - } - - arm_compute::Window window; - window.use_tensor_dimensions(_out->info()->tensor_shape()); - - execute_window_loop(window, - [this](const arm_compute::Coordinates &id) { castData(_in, _out, id); }); - - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - CAST_CL(_out)->unmap(q); - CAST_CL(_in)->unmap(q); - } -} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h deleted file mode 100644 index f9a48b481..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file SimpleCastLayer.h - * @brief This file contains SimpleCastLayer class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __SIMPLE_CAST_LAYER_H__ -#define __SIMPLE_CAST_LAYER_H__ - -#include "internal/arm_compute.h" -#include "internal/arm_compute/Cast.h" - -/** - * @brief Class to run SimpleCast Layer - */ -class SimpleCastLayer : public ::arm_compute::IFunction -{ -public: - SimpleCastLayer(void) : _in(nullptr), _out(nullptr) - { - // DO NOTHING - } - - /** - * @brief Configure the layer - * @param[in] in The source tensor - * @param[in] out The destination tensor - * @return N/A - */ - void configure(::arm_compute::ITensor *in, ::arm_compute::ITensor *out); - - /** - * @brief Run the operation. Must be called after configure(). - * @return N/A - */ - void run(void) override; - -private: - /** - * @brief Cast and copy data from one tensor to another - * - * @param[in] in The source tensor - * @param[out] out The destination tensor - * @param[in] id Coordinates to copy - * @return N/A - */ - void castData(::arm_compute::ITensor *in, ::arm_compute::ITensor *out, - const arm_compute::Coordinates &id); - - ::arm_compute::ITensor *_in; - ::arm_compute::ITensor *_out; -}; - -#endif // __SIMPLE_CAST_LAYER_H__ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc deleted file mode 100644 index d62a8321b..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "internal/layers/SimpleDepthToSpace.h" - -#include <arm_compute/runtime/CL/CLScheduler.h> - -void SimpleDepthToSpace::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, - int32_t block_size, const ::arm_compute::Coordinates &axises) -{ - const auto rank = axises.num_dimensions(); - assert(rank == 4); - for (int i = 0; i < rank; ++i) - { - assert(axises[i] >= 0); - assert(axises[i] < rank); - } - - _input = input; - _output = output; - _block_size = block_size; - _axises = axises; -} - -template <typename T> -inline void DepthToSpace(const ::arm_compute::ITensor *input, - const ::arm_compute::TensorShape &input_shape, int32_t block_size, - ::arm_compute::ITensor *output, - const ::arm_compute::TensorShape &output_shape, - const ::arm_compute::Coordinates &axises) -{ - const int output_batch = output_shape[axises[0]]; - const int output_height = output_shape[axises[1]]; - const int output_width = output_shape[axises[2]]; - const int output_depth = output_shape[axises[3]]; - - for (int out_b = 0; out_b < output_batch; ++out_b) - { - for (int out_h = 0; out_h < output_height; ++out_h) - { - for (int out_w = 0; out_w < output_width; ++out_w) - { - for (int out_d = 0; out_d < output_depth; ++out_d) - { - const int in_b = out_b; - const int in_h = out_h / block_size; - const int in_w = out_w / block_size; - const int in_d = - out_d + ((out_h % block_size) * block_size + out_w % block_size) * output_depth; - - auto input_id = - asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises); - auto output_id = asARMComputeCoordinates( - ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises); - - *reinterpret_cast<T *>(output->ptr_to_element(output_id)) = - *reinterpret_cast<T *>(input->ptr_to_element(input_id)); - } - } - } - } -} - -void SimpleDepthToSpace::run() -{ - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_input)->map(q); - CAST_CL(_output)->map(q); - } - - switch (_input->info()->data_type()) - { - case ::arm_compute::DataType::U8: - case ::arm_compute::DataType::QASYMM8: - DepthToSpace<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output, - _output->info()->tensor_shape(), _axises); - break; - case ::arm_compute::DataType::F32: - DepthToSpace<float>(_input, _input->info()->tensor_shape(), _block_size, _output, - _output->info()->tensor_shape(), _axises); - break; - default: - ARM_COMPUTE_ERROR("DataType not supported"); - break; - } - - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_input)->unmap(q); - CAST_CL(_output)->unmap(q); - } -} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h deleted file mode 100644 index 1032aaa47..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __SIMPLE_DEPTH_TO_SPACE_H__ -#define __SIMPLE_DEPTH_TO_SPACE_H__ - -#include "internal/arm_compute.h" -#include "internal/arm_compute/Cast.h" - -class SimpleDepthToSpace : public ::arm_compute::IFunction -{ -public: - SimpleDepthToSpace(void) : _input(nullptr), _output(nullptr), _block_size(0), _axises{} - { - // DO NOTHING - } - -public: - /** Initialise input and output - * - * @param[in] input First tensor input. - * @param[out] output Output tensor. - * @param[in] block_size Block size. - * @param[in] axises Axises of rank 4 - */ - void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size, - const ::arm_compute::Coordinates &axises = getARMComputeAxises(4)); - - void run() override; - -private: - ::arm_compute::ITensor *_input; - ::arm_compute::ITensor *_output; - int32_t _block_size; - ::arm_compute::Coordinates _axises; -}; - -#endif /*__SIMPLE_DEPTH_TO_SPACE_H__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc deleted file mode 100644 index ae740bb10..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "internal/layers/SimpleEmbeddingLookup.h" - -#include <arm_compute/runtime/CL/CLScheduler.h> - -void SimpleEmbeddingLookup::configure(::arm_compute::ITensor *lookups, - ::arm_compute::ITensor *values, - ::arm_compute::ITensor *output) -{ - assert(values->info()->num_dimensions() == output->info()->num_dimensions()); - assert(values->info()->num_dimensions() > 1 && values->info()->num_dimensions() <= 4); - _lookups = lookups; - _values = values; - _output = output; -} - -void SimpleEmbeddingLookup::run() -{ - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_lookups)->map(q); - CAST_CL(_values)->map(q); - CAST_CL(_output)->map(q); - } - - // type of elements of lookups is always integer - const int32_t *lookups_buf = reinterpret_cast<int32_t *>(_lookups->buffer()); - - const auto lookups_info = _lookups->info(); - const auto values_info = _values->info(); - const auto output_info = _output->info(); - - // NOTE The first dimension's position is always at the end of dimensions. - const auto first_dim_pos = values_info->num_dimensions() - 1; - - const size_t first_dim = values_info->dimension(first_dim_pos); - for (size_t i = 0; i < lookups_info->dimension(0); ++i) - { - if (lookups_buf[i] < 0 || lookups_buf[i] >= first_dim) - throw std::runtime_error("Embedding Lookup: index out of bounds."); - } - - // If each strides of values and output are different, applied padding size of the two tensors are - // different, therefore, it can not be copied at once. - auto can_copy_at_once = [&]() -> bool { - const auto &values_strides = values_info->strides_in_bytes(); - const auto &output_strides = output_info->strides_in_bytes(); - - for (size_t i = 0; i < first_dim_pos; ++i) - { - if (values_strides[i] != values_strides[i]) - return false; - } - - return true; - }; - - using ::arm_compute::Window; - using ::arm_compute::Iterator; - - size_t copy_bytes; - Window window; - if (can_copy_at_once()) - { - copy_bytes = values_info->total_size() / first_dim; - window.use_tensor_dimensions(output_info->tensor_shape(), first_dim_pos); - } - else - { - copy_bytes = values_info->dimension(0) * values_info->element_size(); - window.use_tensor_dimensions(output_info->tensor_shape(), Window::DimY); - } - - Iterator it(_output, window); - execute_window_loop(window, - [&](const ::arm_compute::Coordinates &id) { - ::arm_compute::Coordinates values_id = id; - const int idx = id[first_dim_pos]; - values_id.set(first_dim_pos, lookups_buf[idx]); - memcpy(it.ptr(), _values->ptr_to_element(values_id), copy_bytes); - }, - it); - - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_lookups)->unmap(q); - CAST_CL(_values)->unmap(q); - CAST_CL(_output)->unmap(q); - } -} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h deleted file mode 100644 index fd499437f..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __SIMPLE_EMBEDDING_LOOKUP_H__ -#define __SIMPLE_EMBEDDING_LOOKUP_H__ - -#include "internal/arm_compute.h" - -/** - * @file SimpleEmbeddingLookup.h - * @brief This file contains SimpleEmbeddingLookup class - * @ingroup COM_AI_RUNTIME - */ - -/** - * @brief Class to run SimpleEmbeddingLookup Layer - */ -class SimpleEmbeddingLookup : public ::arm_compute::IFunction -{ -public: - SimpleEmbeddingLookup(void) : _lookups(nullptr), _values(nullptr), _output(nullptr) - { - // DO NOTHING - } - -public: - /** - * @brief Configure the layer - * @param[in] lookups 1D tensor which contains lookup values - * @param[in] values The source tensor - * @param[in] output The destination tensor - * @return N/A - */ - void configure(::arm_compute::ITensor *lookups, ::arm_compute::ITensor *values, - ::arm_compute::ITensor *output); - - /** - * @brief Run the operation. Must be called after configure(). - * @return N/A - */ - void run() override; - -private: - ::arm_compute::ITensor *_lookups; - ::arm_compute::ITensor *_values; - ::arm_compute::ITensor *_output; -}; - -#endif /*__SIMPLE_EMBEDDING_LOOKUP_H__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc deleted file mode 100644 index 7f8ae2505..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "internal/layers/SimpleHashtableLookupLayer.h" - -#include <arm_compute/runtime/CL/CLScheduler.h> - -void SimpleHashtableLookupLayer::configure(::arm_compute::ITensor *lookups, - ::arm_compute::ITensor *keys, - ::arm_compute::ITensor *values, - ::arm_compute::ITensor *output, - ::arm_compute::ITensor *hits) -{ - _lookups = lookups; - _keys = keys; - _values = values; - _output = output; - _hits = hits; - _lookup_indices.resize(lookups->info()->dimension(0), -1); -} - -void SimpleHashtableLookupLayer::run() -{ - auto &queue = ::arm_compute::CLScheduler::get().queue(); - if (::internal::arm_compute::isGpuMode()) - { - CAST_CL(_lookups)->map(queue); - CAST_CL(_keys)->map(queue); - CAST_CL(_values)->map(queue); - CAST_CL(_output)->map(queue); - CAST_CL(_hits)->map(queue); - } - - const int32_t *lookups_buf = reinterpret_cast<int32_t *>(_lookups->buffer()); - const int32_t *keys_buf = reinterpret_cast<int32_t *>(_keys->buffer()); - uint8_t *hits_buf = reinterpret_cast<uint8_t *>(_hits->buffer()); - - const auto lookups_info = _lookups->info(); - const auto values_info = _values->info(); - const auto keys_info = _keys->info(); - const auto output_info = _output->info(); - - // NOTE The first dimension's position must be always at the end of dimensions. - const auto first_dim_pos = values_info->num_dimensions() - 1; - const size_t first_dim = values_info->dimension(first_dim_pos); - - std::map<int32_t, size_t> key_map; - const int keys_num = keys_info->dimension(0); - for (size_t key_index = 0; key_index < keys_num; key_index++) - { - key_map[keys_buf[key_index]] = key_index; - } - - const int lookups_num = lookups_info->dimension(0); - for (size_t i = 0; i < lookups_num; ++i) - { - const auto lookup_value = lookups_buf[i]; - const auto it = key_map.find(lookup_value); - if (it != key_map.end()) - { - if (it->second >= first_dim) - throw std::runtime_error("HashTable Lookup: index out of bounds."); - _lookup_indices[i] = it->second; - } - } - - // If each strides of values and output are different, applied padding size of the two tensors are - // different, therefore, it can not be copied at once. - auto can_copy_at_once = [&]() -> bool { - const auto &values_strides = values_info->strides_in_bytes(); - const auto &output_strides = output_info->strides_in_bytes(); - - for (size_t i = 0; i < first_dim_pos; ++i) - { - if (values_strides[i] != values_strides[i]) - return false; - } - - return true; - }; - - using ::arm_compute::Window; - using ::arm_compute::Iterator; - using ::arm_compute::Coordinates; - - size_t copy_bytes; - Window window; - if (can_copy_at_once()) - { - copy_bytes = values_info->total_size() / first_dim; - window.use_tensor_dimensions(output_info->tensor_shape(), first_dim_pos); - } - else - { - copy_bytes = values_info->dimension(0) * values_info->element_size(); - window.use_tensor_dimensions(output_info->tensor_shape(), Window::DimY); - } - - Iterator it(_output, window); - execute_window_loop(window, - [&](const Coordinates &id) { - Coordinates values_id = id; - const int idx = id[first_dim_pos]; - const int lookup_index = _lookup_indices[idx]; - if (lookup_index >= 0) - { - values_id.set(first_dim_pos, lookup_index); - memcpy(it.ptr(), _values->ptr_to_element(values_id), copy_bytes); - hits_buf[lookup_index] = 1; - } - else - { - memset(it.ptr(), 0, copy_bytes); - hits_buf[lookup_index] = 0; - } - }, - it); - - if (::internal::arm_compute::isGpuMode()) - { - CAST_CL(_lookups)->unmap(queue); - CAST_CL(_keys)->unmap(queue); - CAST_CL(_values)->unmap(queue); - CAST_CL(_output)->unmap(queue); - CAST_CL(_hits)->unmap(queue); - } -} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.h deleted file mode 100644 index ba9d2ec0d..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __SIMPLE_HASHTABLE_LOOKUP_H__ -#define __SIMPLE_HASHTABLE_LOOKUP_H__ - -#include "internal/arm_compute.h" - -class SimpleHashtableLookupLayer : public ::arm_compute::IFunction -{ -public: - SimpleHashtableLookupLayer(void) - : _lookups(nullptr), _keys(nullptr), _values(nullptr), _output(nullptr), _hits(nullptr) - { - // DO NOTHING - } - - void configure(::arm_compute::ITensor *lookups, ::arm_compute::ITensor *keys, - ::arm_compute::ITensor *values, ::arm_compute::ITensor *output, - ::arm_compute::ITensor *hits); - - void run() override; - -private: - ::arm_compute::ITensor *_lookups; - ::arm_compute::ITensor *_keys; - ::arm_compute::ITensor *_values; - ::arm_compute::ITensor *_output; - ::arm_compute::ITensor *_hits; - std::vector<int32_t> _lookup_indices; -}; - -#endif /*__SIMPLE_HASHTABLE_LOOKUP_H__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.cc deleted file mode 100644 index d3943ad40..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.cc +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "internal/layers/SimpleNeg.h" - -#include <arm_compute/runtime/CL/CLScheduler.h> - -void SimpleNeg::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output) -{ - _input = input; - _output = output; -} - -void SimpleNeg::run() -{ - auto &queue = ::arm_compute::CLScheduler::get().queue(); - if (::internal::arm_compute::isGpuMode()) - { - CAST_CL(_input)->map(queue); - CAST_CL(_output)->map(queue); - } - - arm_compute::Window window; - window.use_tensor_dimensions(_output->info()->tensor_shape()); - - execute_window_loop(window, [this](const arm_compute::Coordinates &id) { - // NOTE Must be two input tensors of identical type - // Must be output tensor of the same type as input0. - assert(_input->info()->data_type() == _output->info()->data_type()); - - switch (_input->info()->data_type()) - { - case ::arm_compute::DataType::F32: - { - const auto input_value = *reinterpret_cast<float *>(_input->ptr_to_element(id)); - *reinterpret_cast<float *>(_output->ptr_to_element(id)) = -input_value; - break; - } - case ::arm_compute::DataType::S32: - { - const auto input_value = *reinterpret_cast<int32_t *>(_input->ptr_to_element(id)); - *reinterpret_cast<int32_t *>(_output->ptr_to_element(id)) = -input_value; - break; - } - case ::arm_compute::DataType::U32: - { - const auto input_value = *reinterpret_cast<uint32_t *>(_input->ptr_to_element(id)); - *reinterpret_cast<uint32_t *>(_output->ptr_to_element(id)) = -input_value; - break; - } - default: - throw std::runtime_error("Not supported, yet"); - break; - } - }); - - if (::internal::arm_compute::isGpuMode()) - { - CAST_CL(_input)->unmap(queue); - CAST_CL(_output)->unmap(queue); - } -} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.h deleted file mode 100644 index 4ca88e7f8..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __SIMPLE_NEG_H__ -#define __SIMPLE_NEG_H__ - -#include "internal/arm_compute.h" - -class SimpleNeg : public ::arm_compute::IFunction -{ -public: - SimpleNeg(void) : _input(nullptr), _output(nullptr) - { - // DO NOTHING - } - - void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output); - - void run() override; - -private: - ::arm_compute::ITensor *_input; - ::arm_compute::ITensor *_output; -}; - -#endif /*__SIMPLE_NEG_H__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.cc deleted file mode 100644 index 2a0a25f0c..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.cc +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "internal/arm_compute.h" -#include "SimplePackLayer.h" - -void SimplePackLayer::configure(const std::vector<::arm_compute::ICLTensor *> &input_vector, - ::arm_compute::ICLTensor *output, int32_t axis) -{ - uint32_t nr_inputs = input_vector.size(); - uint32_t output_rank = output->info()->num_dimensions(); - const ::arm_compute::PermutationVector pv{1, 2, 0}; - _cl_permuted_vector.resize(nr_inputs); - _cl_permute_vector.resize(nr_inputs); - - _output = output; - // A negative axis implies axis from the end. - // For example, axis = -1 implies the first axis from the end, i.e. axis = Rank - 1. - // Similarly, axis = -2 imples second axis from the end, i.e. axis = Rank - 2. - if (axis < 0) - { - axis += output_rank; - } - _axis = ToARMComputeAxis(output_rank, axis).value(); - _cl_reshape_vector.resize(nr_inputs); - - ::arm_compute::TensorShape subTensor_shape{}; - for (int i = 0; i < output_rank; i++) - { - if (i != _axis) - { - subTensor_shape.set(i, _output->info()->tensor_shape()[i]); - } - else - { - subTensor_shape.set(i, 1); - } - } - - auto subTensor_offset = ::arm_compute::Coordinates{}; - subTensor_offset.set_num_dimensions(output_rank); - - for (int i = 0; i < input_vector.size(); i++) - { - _input_vector.push_back(input_vector[i]); - subTensor_offset[_axis] = i; - auto temp_tensor = std::make_shared<::arm_compute::CLSubTensor>( - CAST_CL(_output), subTensor_shape, subTensor_offset, true); - _sub_tensor_vector.push_back(temp_tensor); - // configure to resize of input tensor in sub tensor offseted, dimension expansion will be - // automatic - _cl_permute_vector[i].configure(CAST_CL(_input_vector[i]), &_cl_permuted_vector[i], pv); - _cl_reshape_vector[i].configure(&_cl_permuted_vector[i], _sub_tensor_vector[i].get()); - _cl_permuted_vector[i].allocator()->allocate(); - } -} - -void SimplePackLayer::run(void) -{ - for (int i = 0; i < _input_vector.size(); i++) - { - _cl_permute_vector[i].run(); - _cl_reshape_vector[i].run(); - } -} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.h deleted file mode 100644 index 2c2fc37f2..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __SIMPLE_PACK_LAYER_H__ -#define __SIMPLE_PACK_LAYER_H__ - -#include <arm_compute/runtime/CL/CLTensor.h> -#include <arm_compute/runtime/CL/CLSubTensor.h> -#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h> -#include <arm_compute/runtime/CL/functions/CLPermute.h> - -class SimplePackLayer : public ::arm_compute::IFunction -{ -public: - SimplePackLayer(void) - : _cl_permuted_vector{}, _input_vector{}, _sub_tensor_vector{}, _cl_reshape_vector{}, - _cl_permute_vector{}, _output(nullptr), _axis(0) - { - // DO NOTHING - } - -public: - void configure(const std::vector<::arm_compute::ICLTensor *> &input_vector, - ::arm_compute::ICLTensor *output, int axis); - -public: - void run(void) override; - -private: - std::vector<::arm_compute::CLTensor> _cl_permuted_vector; - std::vector<::arm_compute::ICLTensor *> _input_vector; - std::vector<std::shared_ptr<::arm_compute::CLSubTensor>> _sub_tensor_vector; - std::vector<::arm_compute::CLReshapeLayer> _cl_reshape_vector; - std::vector<::arm_compute::CLPermute> _cl_permute_vector; - ::arm_compute::ICLTensor *_output; - int _axis; -}; - -#endif // __SIMPLE_PACK_LAYER_H__ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc deleted file mode 100644 index 64236603f..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "internal/layers/SimplePadLayer.h" -#include <arm_compute/runtime/CL/CLScheduler.h> - -namespace -{ -bool validate_arg(const ::arm_compute::ITensor *input, const ::arm_compute::ITensor *output, - const ::arm_compute::ITensor *padding_size, - const ::arm_compute::Coordinates &axises) -{ - const int input_batch = input->info()->tensor_shape()[axises[0]]; - const int input_height = input->info()->tensor_shape()[axises[1]]; - const int input_width = input->info()->tensor_shape()[axises[2]]; - const int input_depth = input->info()->tensor_shape()[axises[3]]; - - const int output_batch = output->info()->tensor_shape()[axises[0]]; - const int output_height = output->info()->tensor_shape()[axises[1]]; - const int output_width = output->info()->tensor_shape()[axises[2]]; - const int output_depth = output->info()->tensor_shape()[axises[3]]; - - auto pad_batch_up = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 0})); - auto pad_batch_down = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 0})); - auto pad_height_top = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 1})); - auto pad_height_bottom = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 1})); - auto pad_width_left = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 2})); - auto pad_width_right = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 2})); - auto pad_depth_front = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 3})); - auto pad_depth_back = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 3})); - - const int padded_batch = input_batch + pad_batch_up + pad_batch_down; - const int padded_height = input_height + pad_height_top + pad_height_bottom; - const int padded_width = input_width + pad_width_left + pad_width_right; - const int padded_depth = input_depth + pad_depth_front + pad_depth_back; - - return (padded_batch == output_batch) && (padded_height == output_height) && - (padded_width == output_width) && (padded_depth == output_depth); -} -} // namespace - -void SimplePadLayer::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, - ::arm_compute::ITensor *padding_size, - const ::arm_compute::Coordinates &axises) -{ - - const auto rank = axises.num_dimensions(); - assert(rank == 4); - assert(input != nullptr && output != nullptr && padding_size != nullptr); - - for (int i = 0; i < rank; ++i) - { - assert(axises[i] >= 0); - assert(axises[i] < rank); - } - - _input = input; - _output = output; - _padding_size = padding_size; - _axises = axises; -} - -template <typename T> -inline void ApplyPadding(const ::arm_compute::ITensor *input_data, - const ::arm_compute::TensorShape &input_shape, - const ::arm_compute::ITensor *padding_size, - ::arm_compute::ITensor *output_data, - const ::arm_compute::TensorShape &output_shape, - const ::arm_compute::Coordinates &axises, T zero_value) -{ - - assert(validate_arg(input_data, output_data, padding_size, axises) && - "Padded Input shape does not match to output shape"); - - const int input_batch = input_shape[axises[0]]; - const int input_height = input_shape[axises[1]]; - const int input_width = input_shape[axises[2]]; - const int input_depth = input_shape[axises[3]]; - - const int output_batch = output_shape[axises[0]]; - const int output_height = output_shape[axises[1]]; - const int output_width = output_shape[axises[2]]; - const int output_depth = output_shape[axises[3]]; - - // Padding size for Up, Top, Left and Front are required. - auto pad_batch_up = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 0})); - auto pad_height_top = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 1})); - auto pad_width_left = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 2})); - auto pad_depth_front = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 3})); - - for (int out_b = 0; out_b < output_batch; ++out_b) - { - for (int out_h = 0; out_h < output_height; ++out_h) - { - for (int out_w = 0; out_w < output_width; ++out_w) - { - for (int out_d = 0; out_d < output_depth; ++out_d) - { - auto output_id = asARMComputeCoordinates( - ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises); - - if (out_b < pad_batch_up || out_b >= (input_batch + pad_batch_up) || - out_h < pad_height_top || out_h >= (input_height + pad_height_top) || - out_w < pad_width_left || out_w >= (input_width + pad_width_left) || - out_d < pad_depth_front || out_d >= (input_depth + pad_depth_front)) - { - *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) = zero_value; - } - else - { - auto input_id = asARMComputeCoordinates( - ::arm_compute::Coordinates{out_b - pad_batch_up, out_h - pad_height_top, - out_w - pad_width_left, out_d - pad_depth_front}, - axises); - *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) = - *reinterpret_cast<T *>(input_data->ptr_to_element(input_id)); - } - } - } - } - } -} -void SimplePadLayer::run() -{ - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_input)->map(q); - CAST_CL(_output)->map(q); - CAST_CL(_padding_size)->map(q); - } - - switch (_input->info()->data_type()) - { - case ::arm_compute::DataType::U8: - case ::arm_compute::DataType::QASYMM8: - ApplyPadding<uint8_t>(_input, _input->info()->tensor_shape(), _padding_size, _output, - _output->info()->tensor_shape(), _axises, - _input->info()->quantization_info().offset); - break; - case ::arm_compute::DataType::F32: - ApplyPadding<float>(_input, _input->info()->tensor_shape(), _padding_size, _output, - _output->info()->tensor_shape(), _axises, 0.0f); - break; - default: - ARM_COMPUTE_ERROR("DataType not supported"); - break; - } - - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_input)->unmap(q); - CAST_CL(_output)->unmap(q); - CAST_CL(_padding_size)->unmap(q); - } -} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h deleted file mode 100644 index 8cb6659ce..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __SIMPLE_PAD_LAYER_H__ -#define __SIMPLE_PAD_LAYER_H__ - -#include "internal/arm_compute.h" -#include "internal/arm_compute/Cast.h" - -class SimplePadLayer : public ::arm_compute::IFunction -{ -public: - SimplePadLayer(void) : _input(nullptr), _output(nullptr), _padding_size(nullptr), _axises{} - { - // DO NOTHING - } - - void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, - ::arm_compute::ITensor *padding_size, - const ::arm_compute::Coordinates &axises = getARMComputeAxises(4)); - - void run(void) override; - -private: - ::arm_compute::ITensor *_input; - ::arm_compute::ITensor *_output; - ::arm_compute::ITensor *_padding_size; - ::arm_compute::Coordinates _axises; -}; - -#endif // __SIMPLE_PAD_LAYER_H__ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc deleted file mode 100644 index b5b3a0950..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "internal/layers/SimpleSQRT.h" - -#include <arm_compute/runtime/CL/CLScheduler.h> - -void SimpleSQRT::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output) -{ - _input = input; - _output = output; -} - -void SimpleSQRT::run() -{ - auto &queue = ::arm_compute::CLScheduler::get().queue(); - if (::internal::arm_compute::isGpuMode()) - { - CAST_CL(_input)->map(queue); - CAST_CL(_output)->map(queue); - } - - arm_compute::Window window; - window.use_tensor_dimensions(_output->info()->tensor_shape()); - - execute_window_loop(window, [this](const arm_compute::Coordinates &id) { - // NOTE Must be two input tensors of identical type - // Must be output tensor of the same type as input0. - assert(_input->info()->data_type() == _output->info()->data_type()); - - const auto input_value = *reinterpret_cast<float *>(_input->ptr_to_element(id)); - *reinterpret_cast<float *>(_output->ptr_to_element(id)) = sqrt(input_value); - }); - - if (::internal::arm_compute::isGpuMode()) - { - CAST_CL(_input)->unmap(queue); - CAST_CL(_output)->unmap(queue); - } -} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h deleted file mode 100644 index b05a9e32e..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __SIMPLE_SQRT_H__ -#define __SIMPLE_SQRT_H__ - -#include "internal/arm_compute.h" - -class SimpleSQRT : public ::arm_compute::IFunction -{ -public: - SimpleSQRT(void) : _input(nullptr), _output(nullptr) - { - // DO NOTHING - } - - void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output); - - void run() override; - -private: - ::arm_compute::ITensor *_input; - ::arm_compute::ITensor *_output; -}; - -#endif /*__SIMPLE_SQRT_H__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc deleted file mode 100644 index f53675b99..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "internal/layers/SimpleSpaceToBatchND.h" - -#include <arm_compute/runtime/CL/CLScheduler.h> - -void SimpleSpaceToBatchND::configure(::arm_compute::ITensor *input, - ::arm_compute::ITensor *block_size, - ::arm_compute::ITensor *padding_size, - ::arm_compute::ITensor *output) -{ - const auto rank = input->info()->num_dimensions(); - assert(rank == 4); - - _input = input; - _block_size = block_size; - _padding_size = padding_size; - _output = output; -} - -template <typename T> -inline void -SpaceToBatchND(const ::arm_compute::ITensor *input, const ::arm_compute::TensorShape &input_shape, - const ::arm_compute::ITensor *block_size, const ::arm_compute::ITensor *padding_size, - const ::arm_compute::ITensor *output, const ::arm_compute::TensorShape &output_shape, - T zero_value) -{ - const int input_batch = input_shape[3]; - const int input_height = input_shape[1]; - const int input_width = input_shape[0]; - - const int depth = output_shape[2]; - - const int padding_height_left = *reinterpret_cast<int *>(padding_size->ptr_to_element({0, 1})); - const int padding_height_right = *reinterpret_cast<int *>(padding_size->ptr_to_element({1, 1})); - const int padding_width_left = *reinterpret_cast<int *>(padding_size->ptr_to_element({0, 0})); - const int padding_width_right = *reinterpret_cast<int *>(padding_size->ptr_to_element({1, 0})); - const int padded_height = input_height + padding_height_left + padding_height_right; - const int padded_width = input_width + padding_width_left + padding_width_right; - - const int block_size_height = *reinterpret_cast<int *>(block_size->ptr_to_element({1})); - const int block_size_width = *reinterpret_cast<int *>(block_size->ptr_to_element({0})); - - assert(padding_height_left >= 0); - assert(padding_height_right >= 0); - assert(padding_width_left >= 0); - assert(padding_width_right >= 0); - assert(block_size_height >= 1); - assert(block_size_width >= 1); - assert(padded_height % block_size_height == 0); - assert(padded_width % block_size_width == 0); - assert(output->info()->dimension(3) == - input->info()->dimension(3) * (block_size_height * block_size_width)); - - for (int in_b = 0; in_b < input_batch; ++in_b) - { - for (int in_d = 0; in_d < depth; ++in_d) - { - for (int in_h = 0; in_h < padded_height; ++in_h) - { - for (int in_w = 0; in_w < padded_width; ++in_w) - { - const int out_d = in_d; - const int out_h = in_h / block_size_height; - const int out_w = in_w / block_size_width; - const int out_b = - in_b + - ((in_h % block_size_height) * block_size_width + in_w % block_size_width) * - input_batch; - - const ::arm_compute::Coordinates output_id{out_w, out_h, out_d, out_b}; - - if (in_h < padding_height_left || in_h >= (input_height + padding_height_left) || - in_w < padding_width_left || in_w >= (input_width + padding_width_left)) - { - *reinterpret_cast<T *>(output->ptr_to_element(output_id)) = zero_value; - } - else - { - const ::arm_compute::Coordinates input_id{in_w - padding_width_left, - in_h - padding_height_left, in_d, in_b}; - *reinterpret_cast<T *>(output->ptr_to_element(output_id)) = - *reinterpret_cast<T *>(input->ptr_to_element(input_id)); - } - } - } - } - } -} -void SimpleSpaceToBatchND::run() -{ - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_input)->map(q); - CAST_CL(_block_size)->map(q); - CAST_CL(_padding_size)->map(q); - CAST_CL(_output)->map(q); - } - - switch (_input->info()->data_type()) - { - case ::arm_compute::DataType::U8: - case ::arm_compute::DataType::QASYMM8: - SpaceToBatchND<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _padding_size, - _output, _output->info()->tensor_shape(), - _input->info()->quantization_info().offset); - break; - case ::arm_compute::DataType::F32: - SpaceToBatchND<float>(_input, _input->info()->tensor_shape(), _block_size, _padding_size, - _output, _output->info()->tensor_shape(), 0.0f); - break; - default: - ARM_COMPUTE_ERROR("DataType not supported"); - break; - } - - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_input)->unmap(q); - CAST_CL(_block_size)->unmap(q); - CAST_CL(_padding_size)->unmap(q); - CAST_CL(_output)->unmap(q); - } -} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h deleted file mode 100644 index 4af961d34..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __SIMPLE_SPACE_TO_BATCHND_H__ -#define __SIMPLE_SPACE_TO_BATCHND_H__ - -#include "internal/arm_compute.h" - -class SimpleSpaceToBatchND : public ::arm_compute::IFunction -{ -public: - SimpleSpaceToBatchND(void) - : _input(nullptr), _block_size(nullptr), _padding_size(nullptr), _output(nullptr) - { - // DO NOTHING - } - - /** Initialise input and output - * - * @param[in] input First tensor input. - * @param[in] block_size Block size. - * @param[in] padding_size Padding size. - * @param[out] output Output tensor. - */ - void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *block_size, - ::arm_compute::ITensor *padding_size, ::arm_compute::ITensor *output); - - void run() override; - -private: - ::arm_compute::ITensor *_input; - ::arm_compute::ITensor *_block_size; - ::arm_compute::ITensor *_padding_size; - ::arm_compute::ITensor *_output; -}; - -#endif /*__SIMPLE_SPACE_TO_BATCHND_H__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc deleted file mode 100644 index 3519da1f3..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "internal/layers/SimpleSpaceToDepth.h" - -#include <arm_compute/runtime/CL/CLScheduler.h> - -void SimpleSpaceToDepth::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, - int32_t block_size, const ::arm_compute::Coordinates &axises) -{ - const auto rank = axises.num_dimensions(); - assert(rank == 4); - for (int i = 0; i < rank; ++i) - { - assert(axises[i] >= 0); - assert(axises[i] < rank); - } - - _input = input; - _output = output; - _block_size = block_size; - _axises = axises; -} - -template <typename T> -inline void SpaceToDepth(const ::arm_compute::ITensor *input, - const ::arm_compute::TensorShape &input_shape, int32_t block_size, - ::arm_compute::ITensor *output, - const ::arm_compute::TensorShape &output_shape, - const ::arm_compute::Coordinates &axises) -{ - const int input_batch = input_shape[axises[0]]; - const int input_height = input_shape[axises[1]]; - const int input_width = input_shape[axises[2]]; - const int input_depth = input_shape[axises[3]]; - - for (int in_b = 0; in_b < input_batch; ++in_b) - { - for (int in_h = 0; in_h < input_height; ++in_h) - { - for (int in_w = 0; in_w < input_width; ++in_w) - { - for (int in_d = 0; in_d < input_depth; ++in_d) - { - const int out_b = in_b; - const int out_h = in_h / block_size; - const int out_w = in_w / block_size; - const int out_d = - in_d + ((in_h % block_size) * block_size + in_w % block_size) * input_depth; - - auto input_id = - asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises); - auto output_id = asARMComputeCoordinates( - ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises); - - *reinterpret_cast<T *>(output->ptr_to_element(output_id)) = - *reinterpret_cast<T *>(input->ptr_to_element(input_id)); - } - } - } - } -} - -void SimpleSpaceToDepth::run() -{ - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_input)->map(q); - CAST_CL(_output)->map(q); - } - - switch (_input->info()->data_type()) - { - case ::arm_compute::DataType::U8: - case ::arm_compute::DataType::QASYMM8: - SpaceToDepth<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output, - _output->info()->tensor_shape(), _axises); - break; - case ::arm_compute::DataType::F32: - SpaceToDepth<float>(_input, _input->info()->tensor_shape(), _block_size, _output, - _output->info()->tensor_shape(), _axises); - break; - default: - ARM_COMPUTE_ERROR("DataType not supported"); - break; - } - - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_input)->unmap(q); - CAST_CL(_output)->unmap(q); - } -} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h deleted file mode 100644 index 9e87c364c..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file SimpleSpaceToDepth.h - * @brief This file contains SimpleSpaceToDepth class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __SIMPLE_SPACE_TO_DEPTH_H__ -#define __SIMPLE_SPACE_TO_DEPTH_H__ - -#include "internal/arm_compute.h" -#include "internal/arm_compute/Cast.h" - -/** - * @brief Class to run SimpleEmbeddingLookup Layer - */ -class SimpleSpaceToDepth : public ::arm_compute::IFunction -{ -public: - SimpleSpaceToDepth(void) : _input(nullptr), _output(nullptr), _block_size(0), _axises{} - { - // DO NOTHING - } - - /** - * @brief Configure the layer - * @param[in] input First tensor input. - * @param[in] output Output tensor. - * @param[in] block_size Block size. - * @param[in] axises Axises of rank 4 - * @return N/A - */ - void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size, - const ::arm_compute::Coordinates &axises = getARMComputeAxises(4)); - - /** - * @brief Run the operation. Must be called after configure(). - * @return N/A - */ - void run() override; - -private: - ::arm_compute::ITensor *_input; - ::arm_compute::ITensor *_output; - int32_t _block_size; - ::arm_compute::Coordinates _axises; -}; - -#endif /*__SIMPLE_SPACE_TO_DEPTH_H__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc deleted file mode 100644 index abc291289..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "internal/layers/SimpleTransposeConv.h" -#include <arm_compute/runtime/CL/CLScheduler.h> - -void SimpleTransposeConv::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *weights, - ::arm_compute::ITensor *output, - ::arm_compute::PadStrideInfo &tconv_info, - ::arm_compute::Coordinates axises) -{ - auto rank = axises.num_dimensions(); - - assert(rank == 4); - - _input = input; - _weights = weights; - _output = output; - _stride_width = tconv_info.stride().first; - _stride_height = tconv_info.stride().second; - _pad_width = tconv_info.pad_left(); - _pad_height = tconv_info.pad_top(); - _axises = axises; -} - -template <typename T> -inline void ApplyTransposeConv( - const ::arm_compute::TensorShape &input_shape, const ::arm_compute::ITensor *input_data, - const ::arm_compute::TensorShape &filter_shape, const ::arm_compute::ITensor *filter_data, - const ::arm_compute::TensorShape &output_shape, const ::arm_compute::ITensor *output_data, - const int32_t stride_width, const int32_t stride_height, const int32_t pad_width, - const int32_t pad_height, const ::arm_compute::Coordinates axises) -{ - const int batches = input_shape[axises[0]]; - const int input_height = input_shape[axises[1]]; - const int input_width = input_shape[axises[2]]; - const int input_depth = input_shape[axises[3]]; - - const int filter_height = filter_shape[axises[1]]; - const int filter_width = filter_shape[axises[2]]; - - const int output_height = output_shape[axises[1]]; - const int output_width = output_shape[axises[2]]; - const int output_depth = output_shape[axises[3]]; - - assert(batches == output_shape[axises[0]]); - assert(input_depth == filter_shape[axises[3]]); - assert(filter_shape[axises[0]] == output_depth); - - // Although transpose convolution simplifies to convolution with transposed - // weights for strides of 1, non-unitary striding complicates matters. To - // keep this reference implementation as clear as possible, we use a - // "scatter" access pattern, where we loop through all the input elements, - // computing their influence on the output, rather than looping through the - // output elements in the typical "gather" access pattern of a conv. We - // therefore must initialize the output array to zero. - - // Loop through input elements one at a time. - for (int batch = 0; batch < batches; ++batch) - { - for (int in_y = 0; in_y < input_height; ++in_y) - { - for (int in_x = 0; in_x < input_width; ++in_x) - { - for (int in_channel = 0; in_channel < input_depth; ++in_channel) - { - // Loop through the output elements it will influence - const int out_x_origin = (in_x * stride_width) - pad_width; - const int out_y_origin = (in_y * stride_height) - pad_height; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) - { - for (int filter_x = 0; filter_x < filter_width; ++filter_x) - { - for (int out_channel = 0; out_channel < output_depth; ++out_channel) - { - // Compute output element location - const int out_x = out_x_origin + filter_x; - const int out_y = out_y_origin + filter_y; - // We cannot accumulate out of bounds - if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) && - (out_y < output_height)) - { - auto input_id = asARMComputeCoordinates( - ::arm_compute::Coordinates{batch, in_y, in_x, in_channel}, axises); - auto filter_id = asARMComputeCoordinates( - ::arm_compute::Coordinates{in_channel, filter_y, filter_x, out_channel}, - axises); - auto output_id = asARMComputeCoordinates( - ::arm_compute::Coordinates{batch, out_y, out_x, out_channel}, axises); - T input_value = *reinterpret_cast<T *>(input_data->ptr_to_element(input_id)); - T filter_value = *reinterpret_cast<T *>(filter_data->ptr_to_element(filter_id)); - *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) += - input_value * filter_value; - } - } - } - } - } - } - } - } -} - -void SimpleTransposeConv::run() -{ - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_input)->map(q); - CAST_CL(_weights)->map(q); - CAST_CL(_output)->map(q); - } - - switch (_input->info()->data_type()) - { - case ::arm_compute::DataType::S32: - ApplyTransposeConv<int32_t>(_input->info()->tensor_shape(), _input, - _weights->info()->tensor_shape(), _weights, - _output->info()->tensor_shape(), _output, _stride_width, - _stride_height, _pad_width, _pad_height, _axises); - break; - case ::arm_compute::DataType::F32: - ApplyTransposeConv<float>(_input->info()->tensor_shape(), _input, - _weights->info()->tensor_shape(), _weights, - _output->info()->tensor_shape(), _output, _stride_width, - _stride_height, _pad_width, _pad_height, _axises); - break; - default: - ARM_COMPUTE_ERROR("DataType not supported"); - break; - } - - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - - CAST_CL(_input)->unmap(q); - CAST_CL(_weights)->unmap(q); - CAST_CL(_output)->unmap(q); - } -} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.h deleted file mode 100644 index c5519828b..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __TRANSPOSE_CONV_EX__ -#define __TRANSPOSE_CONV_EX__ - -#include "internal/arm_compute.h" -#include "internal/arm_compute/Cast.h" - -class SimpleTransposeConv : public ::arm_compute::IFunction -{ -public: - SimpleTransposeConv() - : _input(nullptr), _weights(nullptr), _output(nullptr), _stride_width(0), _stride_height(0), - _pad_width(0), _pad_height(0) - { - // DO NOTHING - } - - /** Initialise input and output - * - * @param[in] input First tensor input. - * @param[in] weights Weights - * @param[out] output Output tensor. - * @param[in] tc_info Contains padding and policies to be used in the deconvolution, - * this is decribed in @ref PadStrideInfo. - * @param[in] axises Axises of rank 4 - */ - void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *weights, - ::arm_compute::ITensor *output, ::arm_compute::PadStrideInfo &tconv_info, - ::arm_compute::Coordinates axises = getARMComputeAxises(4)); - - void run() override; - -private: - ::arm_compute::ITensor *_input; - ::arm_compute::ITensor *_weights; - ::arm_compute::ITensor *_output; - int32_t _stride_width; - int32_t _stride_height; - int32_t _pad_width; - int32_t _pad_height; - ::arm_compute::Coordinates _axises; -}; - -#endif /*__TRANSPOSE_CONV_EX__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.cc deleted file mode 100644 index 910595a44..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.cc +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "internal/arm_compute.h" -#include "SimpleUnpackLayer.h" - -void SimpleUnpackLayer::configure(::arm_compute::ICLTensor *input, - const std::vector<::arm_compute::ICLTensor *> &output_vector, - int32_t axis) -{ - uint32_t nr_outputs = output_vector.size(); - _cl_permuted_vector.resize(nr_outputs); - _cl_permute_vector.resize(nr_outputs); - uint32_t input_rank = input->info()->num_dimensions(); - const ::arm_compute::PermutationVector pv{2, 0, 1}; - _input = input; - // Negatige axis is supported, -1 implies R-1 axis where R is input rank - if (axis < 0) - { - axis += input_rank; - } - _axis = ToARMComputeAxis(input_rank, axis).value(); - _cl_reshape_vector.resize(nr_outputs); - - ::arm_compute::TensorShape subTensor_shape{}; - for (int i = 0; i < input_rank; i++) - { - if (i != _axis) - { - subTensor_shape.set(i, _input->info()->tensor_shape()[i]); - } - else - { - subTensor_shape.set(i, 1); - } - } - - auto subTensor_offset = ::arm_compute::Coordinates{}; - subTensor_offset.set_num_dimensions(input_rank); - - for (int i = 0; i < output_vector.size(); i++) - { - _output_vector.push_back(output_vector[i]); - subTensor_offset[_axis] = i; - auto temp_tensor = std::make_shared<::arm_compute::CLSubTensor>( - CAST_CL(_input), subTensor_shape, subTensor_offset, true); - _sub_tensor_vector.push_back(temp_tensor); - // Copies into the subtensor - _cl_permute_vector[i].configure(_sub_tensor_vector[i].get(), &_cl_permuted_vector[i], pv); - _cl_reshape_vector[i].configure(&_cl_permuted_vector[i], CAST_CL(_output_vector[i])); - _cl_permuted_vector[i].allocator()->allocate(); - } -} - -void SimpleUnpackLayer::run(void) -{ - for (int i = 0; i < _output_vector.size(); i++) - { - _cl_permute_vector[i].run(); - _cl_reshape_vector[i].run(); - } -} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.h deleted file mode 100644 index 52fc7513d..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __UNPACK_LAYER_H__ -#define __UNPACK_LAYER_H__ - -#include <arm_compute/runtime/CL/CLTensor.h> -#include <arm_compute/runtime/CL/CLSubTensor.h> -#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h> -#include <arm_compute/runtime/CL/functions/CLPermute.h> - -class SimpleUnpackLayer : public ::arm_compute::IFunction -{ -public: - SimpleUnpackLayer(void) - : _cl_permuted_vector{}, _output_vector{}, _sub_tensor_vector{}, _cl_reshape_vector{}, - _cl_permute_vector{}, _input(nullptr), _axis(0) - { - // DO NOTHING - } - -public: - void configure(::arm_compute::ICLTensor *input, - const std::vector<::arm_compute::ICLTensor *> &output_vector, int32_t axis); - -public: - void run(void) override; - -private: - std::vector<::arm_compute::CLTensor> _cl_permuted_vector; - std::vector<::arm_compute::ICLTensor *> _output_vector; - std::vector<std::shared_ptr<::arm_compute::CLSubTensor>> _sub_tensor_vector; - std::vector<::arm_compute::CLReshapeLayer> _cl_reshape_vector; - std::vector<::arm_compute::CLPermute> _cl_permute_vector; - ::arm_compute::ICLTensor *_input; - int32_t _axis; -}; - -#endif // __UNPACK_LAYER_H__ diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/Reader.h b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/Reader.h index cc51db594..fc6d490da 100644 --- a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/Reader.h +++ b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/Reader.h @@ -49,7 +49,7 @@ public: Reader(const ::nnfw::misc::tensor::Shape &shape, const T *ptr, size_t len) : _shape{shape}, _ptr{ptr} { - assert(shape.element_nums() * sizeof(T) == len); + assert(shape.num_elements() * sizeof(T) == len); initialize(); } diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h index f8f297f97..4766851b9 100644 --- a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h +++ b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h @@ -47,7 +47,7 @@ public: // NOTE The parameter len denotes the number of bytes. View(const ::nnfw::misc::tensor::Shape &shape, T *ptr, size_t len) : _shape{shape}, _ptr{ptr} { - assert(shape.element_nums() * sizeof(T) == len); + assert(shape.num_elements() * sizeof(T) == len); } public: diff --git a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc index f91f834d6..f4d1ca3c5 100644 --- a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc +++ b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc @@ -74,7 +74,7 @@ Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, // 6 -> Padding_bottom index // 7 -> Stride (width) Index // 8 -> Stride (height) INdex - // 9 -> Depthwise Multipler + // 9 -> Depthwise Multiplier // 10 -> Activation Index ifm_index = inputs[0]; ker_index = inputs[1]; @@ -85,7 +85,7 @@ Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, padding_bottom_index = inputs[6]; hstride_index = inputs[7]; vstride_index = inputs[8]; - multipler_index = inputs[9]; + multiplier_index = inputs[9]; activation_index = inputs[10]; } @@ -109,7 +109,7 @@ Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, // 3 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index // 4 -> Stride (width) Index // 5 -> Stride (height) INdex - // 6 -> Depthwise Multipler + // 6 -> Depthwise Multiplier // 7 -> Activation Index ifm_index = inputs[0]; ker_index = inputs[1]; @@ -117,7 +117,7 @@ Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, padding_index = inputs[3]; hstride_index = inputs[4]; vstride_index = inputs[5]; - multipler_index = inputs[6]; + multiplier_index = inputs[6]; activation_index = inputs[7]; } diff --git a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h index c63e30aae..01a9e48be 100644 --- a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h +++ b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h @@ -57,7 +57,7 @@ struct Param int32_t padding_top_index; /**< Index of padding top */ int32_t padding_bottom_index; /**< Index of padding bottom */ - int32_t multipler_index; /**< Index of multipler */ + int32_t multiplier_index; /**< Index of multipler */ int32_t activation_index; /**< Index of activation */ /** * @brief Construct as default @@ -133,7 +133,7 @@ struct Param int32_t vstride_index; /**< Index of vertical stride */ int32_t padding_index; /**< Index of padding */ - int32_t multipler_index; /**< Index of multipler */ + int32_t multiplier_index; /**< Index of multipler */ int32_t activation_index; /**< Index of activation */ /** * @brief Construct as default diff --git a/runtimes/pure_arm_compute/src/internal/op/Gather.cc b/runtimes/pure_arm_compute/src/internal/op/Gather.cc index 6c0dbaf75..bc517d28c 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Gather.cc +++ b/runtimes/pure_arm_compute/src/internal/op/Gather.cc @@ -53,11 +53,11 @@ Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, // Each input should be interpreted as follows: // - // 0 -> LHS Tensor Index - // 1 -> RHS Tensor Index + // 0 -> input Tensor Index + // 1 -> indices Tensor Index // 2 -> axis Index - lhs_index = inputs[0]; - rhs_index = inputs[1]; + ifm_index = inputs[0]; + indices_index = inputs[1]; axis_index = inputs[2]; } diff --git a/runtimes/pure_arm_compute/src/internal/op/Gather.h b/runtimes/pure_arm_compute/src/internal/op/Gather.h index 4470236eb..d40794f99 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Gather.h +++ b/runtimes/pure_arm_compute/src/internal/op/Gather.h @@ -43,9 +43,9 @@ struct Param { int32_t ofm_index; //!< index for output feature map - int32_t lhs_index; //!< index for lhs tensor - int32_t rhs_index; //!< index for rhs tensor - int32_t axis_index; //!< index for axis + int32_t ifm_index; //!< index for ifm tensor + int32_t indices_index; //!< index for indices tensor + int32_t axis_index; //!< index for axis /** * @brief Default Constructor diff --git a/runtimes/pure_arm_compute/src/internal/op/Split.cc b/runtimes/pure_arm_compute/src/internal/op/Split.cc index cbd863fce..6457a106a 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Split.cc +++ b/runtimes/pure_arm_compute/src/internal/op/Split.cc @@ -47,13 +47,16 @@ namespace Split Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs) { - assert(inputCount == 2); + assert(inputCount == 3); // Each input should be interpreted as follows: - // 0 -> A 0-D int32 tensor, indicating the dimension along which to split. - // 1 -> An n-D tensor, specifying the tensor to be split. - axis_index = inputs[0]; - ifm_index = inputs[1]; + // 0 -> An n-D tensor, specifying the tensor to be split. + // 1 -> A 0-D int32 tensor, indicating the dimension along which to split. + // 2 -> A 0-D int32 tensor, indicating the number of outputs + // (It can be ignored on pacl becasue pacl don't support dynamic tensor shape, + // and can be used for verification only) + ifm_index = inputs[0]; + axis_index = inputs[1]; // Each output should be interpreted as follow: // [0, outputCount) -> An n-D tensor. diff --git a/runtimes/pure_arm_compute/src/internal/op/Split.h b/runtimes/pure_arm_compute/src/internal/op/Split.h index b2c6c2fd1..cb5f3eb2d 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Split.h +++ b/runtimes/pure_arm_compute/src/internal/op/Split.h @@ -42,8 +42,8 @@ namespace Split */ struct Param { - int32_t axis_index; //!< index for axis int32_t ifm_index; //!< index for input feature map + int32_t axis_index; //!< index for axis std::vector<int32_t> ofm_indexes; //!< index for output feature map |