summaryrefslogtreecommitdiff
path: root/runtimes/pure_arm_compute/src/internal/layers
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes/pure_arm_compute/src/internal/layers')
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h25
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc8
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h33
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc4
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h27
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/PadLayer.cc78
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.cc181
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.h51
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h25
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc110
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h51
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.cc79
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h93
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc110
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h51
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc137
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h43
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc140
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.h46
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.cc75
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.h (renamed from runtimes/pure_arm_compute/src/internal/layers/PadLayer.h)80
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.cc77
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.h51
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc172
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h44
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc53
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h39
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc142
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h50
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc73
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h35
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc155
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.h59
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.cc74
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.h51
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc40
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h35
37 files changed, 2194 insertions, 403 deletions
diff --git a/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h b/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h
index 502a1ee0e..83ae7c17b 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file        FeatureLoggingLayer.h
+ * @brief       This file contains FeatureLoggingLayer class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __FEATURE_LOGGING_LAYER_H__
#define __FEATURE_LOGGING_LAYER_H__
@@ -27,9 +33,24 @@
#include "internal/arm_compute.h"
+/**
+ * @brief Class to run FeatureLogging Layer
+ */
class FeatureLoggingLayer : public ::arm_compute::IFunction
{
public:
+ FeatureLoggingLayer(void) : _tag(""), _target(nullptr)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Configure the layer
+ * @param[in] tag Text tag for this layer
+ * @param[in] target The feature tensor to be printed
+ * @return N/A
+ */
void configure(const std::string &tag, ::arm_compute::ITensor *target)
{
_tag = tag;
@@ -37,6 +58,10 @@ public:
}
public:
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
void run(void) override
{
if (::internal::arm_compute::isGpuMode())
diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc
index 311284efc..28789a801 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc
+++ b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc
@@ -17,8 +17,6 @@
#include "GenericFullyConnectedLayer.h"
#include "internal/arm_compute.h"
-#include <arm_compute/core/Helpers.h>
-
void GenericFullyConnectedLayer::configure(::arm_compute::ITensor *input,
::arm_compute::ITensor *weights,
::arm_compute::ITensor *biases,
@@ -56,9 +54,9 @@ void GenericFullyConnectedLayer::configure(::arm_compute::ITensor *input,
{
// reshape
auto_init_if_empty(*_neon_buffer.info(), _input->info()->clone()->set_tensor_shape(reshape));
- _generic_reshape.configure(CAST_NE(_input), &_neon_buffer);
+ _generic_reshape.configure(_input, &_neon_buffer);
- _neon_fc.configure(&_neon_buffer, CAST_NE(_weights), CAST_NE(_biases), CAST_NE(_output));
+ _neon_fc.configure(&_neon_buffer, _weights, _biases, _output);
// NOTE _neon_buffer is inaccessible from outside, and thus it is safe to invoke allocate
// here.
@@ -66,7 +64,7 @@ void GenericFullyConnectedLayer::configure(::arm_compute::ITensor *input,
}
else
{
- _neon_fc.configure(CAST_NE(_input), CAST_NE(_weights), CAST_NE(_biases), CAST_NE(_output));
+ _neon_fc.configure(_input, _weights, _biases, _output);
}
}
}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h
index 55d8683da..f1519f54d 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h
@@ -14,23 +14,52 @@
* limitations under the License.
*/
+/**
+ * @file        GenericFullyConnectedLayer.h
+ * @brief       This file contains GenericFullyConnectedLayer class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __GENERIC_FULLY_CONNECTED_LAYER_H__
#define __GENERIC_FULLY_CONNECTED_LAYER_H__
-#include <arm_compute/runtime/Tensor.h>
-#include <arm_compute/runtime/CL/CLTensor.h>
#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
#include "internal/layers/GenericReshapeLayer.h"
+/**
+ * @brief Class to run FullyConnected Layer with both CPU and GPU
+ */
class GenericFullyConnectedLayer : public ::arm_compute::IFunction
{
public:
+ GenericFullyConnectedLayer(void)
+ : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{},
+ _neon_buffer{}, _cl_fc{}, _neon_fc{}, _generic_reshape{}, _needs_reshape(false)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Configure the layer
+ * @param[in] input The source tensor
+ * @param[in] weights The tensor that is filled with weight values
+ * @param[in] biases The tensor that is filled with biase values
+ * @param[in] output The destination tensor
+ * @param[in] needs_reshape Whether it needs to be reshaped or not
+ * @param[in] reshape The tensor shape to be reshaped. Only valid when needs_reshape is true.
+ * @return N/A
+ */
void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *weights,
::arm_compute::ITensor *biases, ::arm_compute::ITensor *output, bool needs_reshape,
::arm_compute::TensorShape reshape);
public:
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
void run(void) override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc
index 2cdfe1b6e..c38c2e9e3 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc
+++ b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc
@@ -43,8 +43,8 @@ void GenericReshapeLayer::configure(::arm_compute::ITensor *input, ::arm_compute
}
else
{
- _neon_permute.configure(CAST_NE(input), &_neon_permuted, pv);
- _neon_reshape.configure(&_neon_permuted, CAST_NE(output));
+ _neon_permute.configure(input, &_neon_permuted, pv);
+ _neon_reshape.configure(&_neon_permuted, output);
// NOTE _permuted is inaccessible from outside, and thus it is safe to invoke allocate here.
_neon_permuted.allocator()->allocate();
diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h
index 1def21085..a22c14c8b 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file        GenericReshapeLayer.h
+ * @brief       This file contains GenericReshapeLayer class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __GENERIC_RESHAPE_LAYER_H__
#define __GENERIC_RESHAPE_LAYER_H__
@@ -25,12 +31,33 @@
#include <arm_compute/runtime/NEON/functions/NEPermute.h>
#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
+/**
+ * @brief Class to run Reshape Layer with both CPU and GPU
+ */
class GenericReshapeLayer : public ::arm_compute::IFunction
{
public:
+ GenericReshapeLayer(void)
+ : _input(nullptr), _output(nullptr), _cl_permuted{}, _neon_permuted{}, _cl_permute{},
+ _cl_reshape{}, _neon_permute{}, _neon_reshape{}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Configure the layer
+ * @param[in] input The source tensor
+ * @param[in] output The destination tensor
+ * @return N/A
+ */
void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output);
public:
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
void run(void) override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/layers/PadLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/PadLayer.cc
deleted file mode 100644
index 4a5370587..000000000
--- a/runtimes/pure_arm_compute/src/internal/layers/PadLayer.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <iostream>
-#include "PadLayer.h"
-#include <arm_compute/runtime/CL/CLScheduler.h>
-
-void PadLayer::configure(::arm_compute::ICLTensor *input, ::arm_compute::ICLTensor *output,
- unsigned int border_width)
-{
- _input = input;
- _output = output;
- _border_width = border_width;
- _output_height = _output->info()->dimension(0);
- _output_width = _output->info()->dimension(1);
-
- uint8_t constant_border_value = 0;
- ::arm_compute::PixelValue constant_pixel_value = ::arm_compute::PixelValue(constant_border_value);
-
- unsigned int padding_size = _border_width;
- input->info()->extend_padding(::arm_compute::PaddingSize{padding_size});
- _fillborderkernel.configure(input, _border_width, ::arm_compute::BorderMode::CONSTANT,
- constant_pixel_value);
-}
-
-void PadLayer::run(void)
-{
- _fillborderkernel.run();
-
- ::arm_compute::Coordinates coordinates =
- ::arm_compute::Coordinates(-_border_width, -_border_width);
- ::arm_compute::TensorShape new_tensor_shape =
- ::arm_compute::TensorShape(_output_height, _output_width);
-
- /* NOTE: The cl kernel fills the data in the borders(not in the tensor).
- Once the tensor is received back at NNAPI, we are adjusting
- the valid region in such a way that the padding becomes part of the tensor itself
- and matches the size of output. */
- _input->info()->set_valid_region(::arm_compute::ValidRegion(coordinates, new_tensor_shape));
-
- /* NOTE: Since cl kernel does not have an argument for output tensor while NNAPI does.
- We need to map the input (tensor that is passed to the cl kernel) back to
- output. */
-
- // TODO: Write a modified CLCopy kernel to do this job.
- populateOutput();
-}
-
-void PadLayer::populateOutput()
-{
- auto &queue = ::arm_compute::CLScheduler::get().queue();
- _input->map(queue);
- _output->map(queue);
-
- auto input_tensor = static_cast<::arm_compute::ITensor *>(_input);
- auto const source_data = input_tensor->buffer();
-
- auto output_tensor = static_cast<::arm_compute::ITensor *>(_output);
- auto dst_data = output_tensor->buffer();
-
- memmove(dst_data, source_data, _output_height * _output_width * 4);
-
- _input->unmap(queue);
- _output->unmap(queue);
-}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.cc
new file mode 100644
index 000000000..6d348e814
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.cc
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleArgMinMax.h"
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleArgMinMax::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+ std::vector<uint32_t> axis, ::arm_compute::ArgOperation op)
+{
+ _input = input;
+ _output = output;
+ _axis = axis;
+ _input_rank = input->info()->num_dimensions();
+ _op_type = op;
+}
+
+inline const ::arm_compute::TensorShape
+inferOutputShape(const ::arm_compute::TensorShape &input_shape, const std::vector<uint32_t> &axis,
+ int input_rank)
+{
+ ::arm_compute::TensorShape out_shape{};
+ size_t dim = 1;
+ for (int i = 0; i < input_rank; ++i)
+ {
+ dim = input_shape[i];
+ out_shape.set(i, dim);
+ }
+
+ for (int i = 0; i < axis.size(); ++i)
+ {
+ out_shape.set(axis[i], 1);
+ }
+
+ return out_shape;
+}
+
+template <typename T>
+inline T getArgMinMaxEle(const ::arm_compute::ITensor *input,
+ const ::arm_compute::TensorShape &input_shape,
+ const ::arm_compute::TensorShape &output_shape, const size_t b,
+ const size_t d, const size_t h, const size_t w, const int axis,
+ const ::arm_compute::ArgOperation op_type)
+{
+ // If output[dimention] == 1, will check all values of that dimension because of reducing
+ // dimension.
+ // Else will check only one value.
+ const size_t start_b = output_shape[3] == 1 ? 0 : b;
+ const size_t start_d = output_shape[2] == 1 ? 0 : d;
+ const size_t start_h = output_shape[1] == 1 ? 0 : h;
+ const size_t start_w = output_shape[0] == 1 ? 0 : w;
+ const size_t stop_b = output_shape[3] == 1 ? input_shape[3] - 1 : b;
+ const size_t stop_d = output_shape[2] == 1 ? input_shape[2] - 1 : d;
+ const size_t stop_h = output_shape[1] == 1 ? input_shape[1] - 1 : h;
+ const size_t stop_w = output_shape[0] == 1 ? input_shape[0] - 1 : w;
+
+ ::arm_compute::Coordinates id{w, h, d, b};
+ ::arm_compute::Coordinates min_max_id{w, h, d, b};
+
+ T value = *reinterpret_cast<T *>(input->ptr_to_element(id));
+ T tval = *reinterpret_cast<T *>(input->ptr_to_element(id));
+
+ for (size_t in_b = start_b; in_b <= stop_b; ++in_b)
+ {
+ id.set(3, in_b);
+ for (size_t in_d = start_d; in_d <= stop_d; ++in_d)
+ {
+ id.set(2, in_d);
+ for (size_t in_h = start_h; in_h <= stop_h; ++in_h)
+ {
+ id.set(1, in_h);
+ for (size_t in_w = start_w; in_w <= stop_w; ++in_w)
+ {
+ id.set(0, in_w);
+ if (op_type == ::arm_compute::ArgOperation::MIN)
+ {
+ value = std::min<T>(value, *reinterpret_cast<T *>(input->ptr_to_element(id)));
+ }
+ else if (op_type == ::arm_compute::ArgOperation::MAX)
+ {
+ value = std::max<T>(value, *reinterpret_cast<T *>(input->ptr_to_element(id)));
+ }
+ else
+ throw std::runtime_error("This Arg operation is not supported, yet");
+
+ if (tval != value)
+ {
+ min_max_id = id;
+ tval = value;
+ }
+ }
+ }
+ }
+ }
+
+ return min_max_id[axis];
+}
+
+template <typename T>
+inline void
+getArgMinMax(const ::arm_compute::ITensor *input, const ::arm_compute::TensorShape &input_shape,
+ const ::arm_compute::TensorShape &output_shape, ::arm_compute::ITensor *output,
+ const int axis, const ::arm_compute::ArgOperation op_type)
+{
+ ::arm_compute::Coordinates id;
+ for (size_t out_b = 0; out_b < output_shape[3]; ++out_b)
+ {
+ id.set(3, out_b);
+ for (size_t out_d = 0; out_d < output_shape[2]; ++out_d)
+ {
+ id.set(2, out_d);
+ for (size_t out_h = 0; out_h < output_shape[1]; ++out_h)
+ {
+ id.set(1, out_h);
+ for (size_t out_w = 0; out_w < output_shape[0]; ++out_w)
+ {
+ id.set(0, out_w);
+ *reinterpret_cast<int *>(output->ptr_to_element(id)) = getArgMinMaxEle<T>(
+ input, input_shape, output_shape, out_b, out_d, out_h, out_w, axis, op_type);
+ }
+ }
+ }
+ }
+}
+
+void SimpleArgMinMax::run()
+{
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->map(q);
+ CAST_CL(_output)->map(q);
+ }
+
+ ::arm_compute::TensorShape input_shape = _input->info()->tensor_shape();
+
+ // Axis dimension is 1 and size is 1.
+ // TODO support axis size > 1.
+ int axis_val = _axis[0];
+ ::arm_compute::TensorShape output_shape = inferOutputShape(input_shape, _axis, _input_rank);
+
+ _output->info()->set_tensor_shape(output_shape);
+ switch (_input->info()->data_type())
+ {
+ case ::arm_compute::DataType::QASYMM8:
+ getArgMinMax<uint8_t>(_input, input_shape, output_shape, _output, axis_val, _op_type);
+ break;
+ case ::arm_compute::DataType::S32:
+ getArgMinMax<int32_t>(_input, input_shape, output_shape, _output, axis_val, _op_type);
+ break;
+ case ::arm_compute::DataType::F32:
+ getArgMinMax<float>(_input, input_shape, output_shape, _output, axis_val, _op_type);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("DataType not supported");
+ break;
+ }
+
+ _output->info()->set_tensor_shape(output_shape);
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->unmap(q);
+ CAST_CL(_output)->unmap(q);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.h
new file mode 100644
index 000000000..b90e74579
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_ARG_MIN_MAX_H__
+#define __SIMPLE_ARG_MIN_MAX_H__
+
+#include "internal/arm_compute.h"
+#include "arm_compute/core/TypesEx.h"
+
+class SimpleArgMinMax : public ::arm_compute::IFunction
+{
+public:
+ SimpleArgMinMax(void) : _input(nullptr), _output(nullptr), _axis(), _input_rank(0)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /** Initialise input and output
+ *
+ * @param[in] input First tensor input.
+ * @param[out] output Output tensor.
+ * @param[in] axis Dimension along which to find Min or Max Index.
+ */
+ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+ std::vector<uint32_t> axis, ::arm_compute::ArgOperation _op_type);
+
+ void run() override;
+
+private:
+ ::arm_compute::ITensor *_input;
+ ::arm_compute::ITensor *_output;
+ std::vector<uint32_t> _axis;
+ int _input_rank;
+ ::arm_compute::ArgOperation _op_type;
+};
+
+#endif /*__SIMPLE_ARG_MIN_MAX_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h
index 31c927b4f..aed9ae286 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h
@@ -14,15 +14,36 @@
* limitations under the License.
*/
+/**
+ * @file        SimpleArithmeticAddition.h
+ * @brief       This file contains SimpleArithmeticAddition class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __SIMPLE_ARITHMETIC_ADDITION_H__
#define __SIMPLE_ARITHMETIC_ADDITION_H__
#include "internal/arm_compute.h"
#include <arm_compute/core/ITensor.h>
+/**
+ * @brief Class to run SimpleArithmeticAddition Layer
+ */
class SimpleArithmeticAddition : public ::arm_compute::IFunction
{
public:
+ SimpleArithmeticAddition(void) : _lhs(nullptr), _rhs(nullptr), _out(nullptr)
+ {
+ // DO NOTHING
+ }
+
+ /**
+ * @brief Configure the layer
+ * @param[in] lhs Lefthand-side operand
+ * @param[in] rhs Righthand-side operand
+ * @param[in] out The destination tensor(Result operand)
+ * @return N/A
+ */
void configure(::arm_compute::ITensor *lhs, ::arm_compute::ITensor *rhs,
::arm_compute::ITensor *out)
{
@@ -32,6 +53,10 @@ public:
}
public:
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
void run(void) override
{
if (::internal::arm_compute::isGpuMode())
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc
new file mode 100644
index 000000000..87175ee1a
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleBatchToSpaceNd.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleBatchToSpaceND::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+ const int32_t *block_size,
+ const ::arm_compute::Coordinates &axises)
+{
+ const auto rank = axises.num_dimensions();
+ assert(rank == 4);
+
+ for (int i = 0; i < rank; ++i)
+ assert(axises[i] >= 0 && axises[i] < rank);
+
+ _input = input;
+ _output = output;
+ _block_size = block_size;
+ _axises = axises;
+}
+
+template <typename T>
+inline void BatchToSpaceND(const ::arm_compute::ITensor *input,
+ const ::arm_compute::TensorShape &input_shape,
+ const int32_t *block_size_data, ::arm_compute::ITensor *output,
+ const ::arm_compute::TensorShape &output_shape,
+ const ::arm_compute::Coordinates &axises)
+{
+ const int output_batch = output_shape[axises[0]];
+ const int output_height = output_shape[axises[1]];
+ const int output_width = output_shape[axises[2]];
+ const int depth = output_shape[axises[3]];
+
+ for (int out_b = 0; out_b < output_batch; ++out_b)
+ {
+ for (int out_h = 0; out_h < output_height; ++out_h)
+ {
+ for (int out_w = 0; out_w < output_width; ++out_w)
+ {
+ for (int out_d = 0; out_d < depth; ++out_d)
+ {
+ const int in_d = out_d;
+ const int in_h = out_h / block_size_data[0];
+ const int in_w = out_w / block_size_data[1];
+ const int in_b =
+ out_b +
+ ((out_h % block_size_data[0]) * block_size_data[1] + out_w % block_size_data[1]) *
+ output_batch;
+
+ auto input_id =
+ asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises);
+ auto output_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
+
+ *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+ *reinterpret_cast<T *>(input->ptr_to_element(input_id));
+ }
+ }
+ }
+ }
+}
+void SimpleBatchToSpaceND::run()
+{
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->map(q);
+ CAST_CL(_output)->map(q);
+ }
+
+ switch (_input->info()->data_type())
+ {
+ case ::arm_compute::DataType::U8:
+ case ::arm_compute::DataType::QASYMM8:
+ BatchToSpaceND<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
+ break;
+ case ::arm_compute::DataType::F32:
+ BatchToSpaceND<float>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("DataType not supported");
+ break;
+ }
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->unmap(q);
+ CAST_CL(_output)->unmap(q);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h
new file mode 100644
index 000000000..5695d9719
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h
@@ -0,0 +1,51 @@
+/*
+ *Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_BATCH_TO_SPACE_ND_H__
+#define __SIMPLE_BATCH_TO_SPACE_ND_H__
+
+#include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
+
+class SimpleBatchToSpaceND : public ::arm_compute::IFunction
+{
+public:
+ SimpleBatchToSpaceND(void) : _input(nullptr), _output(nullptr), _block_size(nullptr), _axises{}
+ {
+ // DO NOTHING
+ }
+
+ /** Initialise input and output
+ *
+ * @param[in] input First tensor input.
+ * @param[out] output Output tensor.
+ * @param[in] block_size Block size.
+ * @param[in] axises Axises of rank 4
+ */
+ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+ const int32_t *block_size,
+ const ::arm_compute::Coordinates &axises = getARMComputeAxises(4));
+
+ void run() override;
+
+private:
+ ::arm_compute::ITensor *_input;
+ ::arm_compute::ITensor *_output;
+ const int32_t *_block_size;
+ ::arm_compute::Coordinates _axises;
+};
+
+#endif /*__SIMPLE_BATCH_TO_SPACE_ND_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.cc
new file mode 100644
index 000000000..7c7706a78
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.cc
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "internal/layers/SimpleCastLayer.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleCastLayer::castData(::arm_compute::ITensor *in, ::arm_compute::ITensor *out,
+ const arm_compute::Coordinates &id)
+{
+ switch (in->info()->data_type())
+ {
+ case ::arm_compute::DataType::F32:
+ {
+ copyCast(*reinterpret_cast<float *>(in->ptr_to_element(id)), out, id);
+ break;
+ }
+ case ::arm_compute::DataType::S32:
+ {
+ copyCast(*reinterpret_cast<int32_t *>(in->ptr_to_element(id)), out, id);
+ break;
+ }
+ case ::arm_compute::DataType::U32:
+ {
+ copyCast(*reinterpret_cast<uint32_t *>(in->ptr_to_element(id)), out, id);
+ break;
+ }
+ case ::arm_compute::DataType::QASYMM8:
+ {
+ const uint8_t quantizedValue = *(in->ptr_to_element(id));
+ copyCast(in->info()->quantization_info().dequantize(quantizedValue), out, id);
+ break;
+ }
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+void SimpleCastLayer::configure(::arm_compute::ITensor *in, ::arm_compute::ITensor *out)
+{
+ _in = in;
+ _out = out;
+}
+
+void SimpleCastLayer::run(void)
+{
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+ CAST_CL(_in)->map(q);
+ CAST_CL(_out)->map(q);
+ }
+
+ arm_compute::Window window;
+ window.use_tensor_dimensions(_out->info()->tensor_shape());
+
+ execute_window_loop(window,
+ [this](const arm_compute::Coordinates &id) { castData(_in, _out, id); });
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+ CAST_CL(_out)->unmap(q);
+ CAST_CL(_in)->unmap(q);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h
index fa3006438..f9a48b481 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h
@@ -14,80 +14,55 @@
* limitations under the License.
*/
+/**
+ * @file        SimpleCastLayer.h
+ * @brief       This file contains SimpleCastLayer class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __SIMPLE_CAST_LAYER_H__
#define __SIMPLE_CAST_LAYER_H__
-#include <arm_compute/core/ITensor.h>
-
#include "internal/arm_compute.h"
-#include "internal/op/Cast.h"
+#include "internal/arm_compute/Cast.h"
+/**
+ * @brief Class to run SimpleCast Layer
+ */
class SimpleCastLayer : public ::arm_compute::IFunction
{
public:
- void configure(::arm_compute::ITensor *in, ::arm_compute::ITensor *out)
+ SimpleCastLayer(void) : _in(nullptr), _out(nullptr)
{
- _in = in;
- _out = out;
+ // DO NOTHING
}
-public:
- void run(void) override
- {
- if (::internal::arm_compute::isGpuMode())
- {
- auto &q = ::arm_compute::CLScheduler::get().queue();
- CAST_CL(_in)->map(q);
- CAST_CL(_out)->map(q);
- }
-
- arm_compute::Window window;
- window.use_tensor_dimensions(_out->info()->tensor_shape());
+ /**
+ * @brief Configure the layer
+ * @param[in] in The source tensor
+ * @param[in] out The destination tensor
+ * @return N/A
+ */
+ void configure(::arm_compute::ITensor *in, ::arm_compute::ITensor *out);
- execute_window_loop(window,
- [this](const arm_compute::Coordinates &id) { castData(_in, _out, id); });
-
- if (::internal::arm_compute::isGpuMode())
- {
- auto &q = ::arm_compute::CLScheduler::get().queue();
- CAST_CL(_out)->unmap(q);
- CAST_CL(_in)->unmap(q);
- }
- }
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
+ void run(void) override;
+private:
+ /**
+ * @brief Cast and copy data from one tensor to another
+ *
+ * @param[in] in The source tensor
+ * @param[out] out The destination tensor
+ * @param[in] id Coordinates to copy
+ * @return N/A
+ */
void castData(::arm_compute::ITensor *in, ::arm_compute::ITensor *out,
- const arm_compute::Coordinates &id)
- {
- switch (in->info()->data_type())
- {
- case ::arm_compute::DataType::F32:
- {
- copyCast(*reinterpret_cast<float *>(in->ptr_to_element(id)), out, id);
- break;
- }
- case ::arm_compute::DataType::S32:
- {
- copyCast(*reinterpret_cast<int32_t *>(in->ptr_to_element(id)), out, id);
- break;
- }
- case ::arm_compute::DataType::U32:
- {
- copyCast(*reinterpret_cast<uint32_t *>(in->ptr_to_element(id)), out, id);
- break;
- }
- case ::arm_compute::DataType::QASYMM8:
- {
- const uint8_t quantizedValue = *(in->ptr_to_element(id));
- copyCast(in->info()->quantization_info().dequantize(quantizedValue), out, id);
- break;
- }
- default:
- throw std::runtime_error("Not supported, yet");
- break;
- }
- }
+ const arm_compute::Coordinates &id);
-private:
::arm_compute::ITensor *_in;
::arm_compute::ITensor *_out;
};
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc
new file mode 100644
index 000000000..d62a8321b
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleDepthToSpace.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleDepthToSpace::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+ int32_t block_size, const ::arm_compute::Coordinates &axises)
+{
+ const auto rank = axises.num_dimensions();
+ assert(rank == 4);
+ for (int i = 0; i < rank; ++i)
+ {
+ assert(axises[i] >= 0);
+ assert(axises[i] < rank);
+ }
+
+ _input = input;
+ _output = output;
+ _block_size = block_size;
+ _axises = axises;
+}
+
+template <typename T>
+inline void DepthToSpace(const ::arm_compute::ITensor *input,
+ const ::arm_compute::TensorShape &input_shape, int32_t block_size,
+ ::arm_compute::ITensor *output,
+ const ::arm_compute::TensorShape &output_shape,
+ const ::arm_compute::Coordinates &axises)
+{
+ const int output_batch = output_shape[axises[0]];
+ const int output_height = output_shape[axises[1]];
+ const int output_width = output_shape[axises[2]];
+ const int output_depth = output_shape[axises[3]];
+
+ for (int out_b = 0; out_b < output_batch; ++out_b)
+ {
+ for (int out_h = 0; out_h < output_height; ++out_h)
+ {
+ for (int out_w = 0; out_w < output_width; ++out_w)
+ {
+ for (int out_d = 0; out_d < output_depth; ++out_d)
+ {
+ const int in_b = out_b;
+ const int in_h = out_h / block_size;
+ const int in_w = out_w / block_size;
+ const int in_d =
+ out_d + ((out_h % block_size) * block_size + out_w % block_size) * output_depth;
+
+ auto input_id =
+ asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises);
+ auto output_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
+
+ *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+ *reinterpret_cast<T *>(input->ptr_to_element(input_id));
+ }
+ }
+ }
+ }
+}
+
+void SimpleDepthToSpace::run()
+{
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->map(q);
+ CAST_CL(_output)->map(q);
+ }
+
+ switch (_input->info()->data_type())
+ {
+ case ::arm_compute::DataType::U8:
+ case ::arm_compute::DataType::QASYMM8:
+ DepthToSpace<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
+ break;
+ case ::arm_compute::DataType::F32:
+ DepthToSpace<float>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("DataType not supported");
+ break;
+ }
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->unmap(q);
+ CAST_CL(_output)->unmap(q);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h
new file mode 100644
index 000000000..1032aaa47
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_DEPTH_TO_SPACE_H__
+#define __SIMPLE_DEPTH_TO_SPACE_H__
+
+#include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
+
+class SimpleDepthToSpace : public ::arm_compute::IFunction
+{
+public:
+ SimpleDepthToSpace(void) : _input(nullptr), _output(nullptr), _block_size(0), _axises{}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /** Initialise input and output
+ *
+ * @param[in] input First tensor input.
+ * @param[out] output Output tensor.
+ * @param[in] block_size Block size.
+ * @param[in] axises Axises of rank 4
+ */
+ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size,
+ const ::arm_compute::Coordinates &axises = getARMComputeAxises(4));
+
+ void run() override;
+
+private:
+ ::arm_compute::ITensor *_input;
+ ::arm_compute::ITensor *_output;
+ int32_t _block_size;
+ ::arm_compute::Coordinates _axises;
+};
+
+#endif /*__SIMPLE_DEPTH_TO_SPACE_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc
index 089c783c1..ae740bb10 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc
@@ -1,3 +1,18 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
#include "internal/layers/SimpleEmbeddingLookup.h"
#include <arm_compute/runtime/CL/CLScheduler.h>
@@ -6,7 +21,8 @@ void SimpleEmbeddingLookup::configure(::arm_compute::ITensor *lookups,
::arm_compute::ITensor *values,
::arm_compute::ITensor *output)
{
- // Assume that verification of operands are already done at Planner::visit()
+ assert(values->info()->num_dimensions() == output->info()->num_dimensions());
+ assert(values->info()->num_dimensions() > 1 && values->info()->num_dimensions() <= 4);
_lookups = lookups;
_values = values;
_output = output;
@@ -25,85 +41,62 @@ void SimpleEmbeddingLookup::run()
// type of elements of lookups is always integer
const int32_t *lookups_buf = reinterpret_cast<int32_t *>(_lookups->buffer());
- const auto values_buf = _values->buffer();
- auto output_buf = _output->buffer();
const auto lookups_info = _lookups->info();
const auto values_info = _values->info();
const auto output_info = _output->info();
- // TODO Refactor below duplicated code!
- const auto values_rank = values_info->num_dimensions();
- switch (values_rank)
+ // NOTE The first dimension's position is always at the end of dimensions.
+ const auto first_dim_pos = values_info->num_dimensions() - 1;
+
+ const size_t first_dim = values_info->dimension(first_dim_pos);
+ for (size_t i = 0; i < lookups_info->dimension(0); ++i)
{
- case 2:
- // (H,W) in nnapi -> (W,H) in acl
- {
- const size_t row_size = values_info->dimension(1);
- const size_t row_bytes = values_info->total_size() / row_size;
- for (size_t i = 0; i < lookups_info->dimension(0); ++i)
- {
- if (lookups_buf[i] < 0 || lookups_buf[i] >= row_size)
- throw std::runtime_error("Embedding Lookup: index out of bounds.");
-
- size_t idx = lookups_buf[i];
- size_t row_offset_by_idx = values_info->offset_element_in_bytes({0, idx});
- size_t row_offset_by_i = output_info->offset_element_in_bytes({0, i});
-
- unsigned char *sink_addr = output_buf + row_offset_by_i;
- unsigned char *source_addr = values_buf + row_offset_by_idx;
- memcpy(sink_addr, source_addr, row_bytes);
- }
- }
- break;
- case 3:
- // (B,H,W) in nnapi -> (W,H,B) in acl
- {
- const size_t row_size = values_info->dimension(2);
- const size_t row_bytes = values_info->total_size() / row_size;
- for (size_t i = 0; i < lookups_info->dimension(0); ++i)
- {
- if (lookups_buf[i] < 0 || lookups_buf[i] >= row_size)
- throw std::runtime_error("Embedding Lookup: index out of bounds.");
-
- size_t idx = lookups_buf[i];
- size_t row_offset_by_idx = values_info->offset_element_in_bytes({0, 0, idx});
- size_t row_offset_by_i = output_info->offset_element_in_bytes({0, 0, i});
-
- unsigned char *sink_addr = output_buf + row_offset_by_i;
- unsigned char *source_addr = values_buf + row_offset_by_idx;
- memcpy(sink_addr, source_addr, row_bytes);
- }
- }
- break;
- case 4:
- // (N,H,W,C) in nnapi -> (N,C,H,W) in acl
- {
- const size_t row_size = values_info->dimension(3);
- const size_t row_bytes = values_info->total_size() / row_size;
- for (size_t i = 0; i < lookups_info->dimension(0); ++i)
- {
- if (lookups_buf[i] < 0 || lookups_buf[i] >= row_size)
- throw std::runtime_error("Embedding Lookup: index out of bounds.");
-
- size_t idx = lookups_buf[i];
- size_t row_offset_by_idx = values_info->offset_element_in_bytes({0, 0, 0, idx});
- size_t row_offset_by_i = output_info->offset_element_in_bytes({0, 0, 0, i});
-
- unsigned char *sink_addr = output_buf + row_offset_by_i;
- unsigned char *source_addr = values_buf + row_offset_by_idx;
- memcpy(sink_addr, source_addr, row_bytes);
- }
- }
- break;
- case 1:
- // In this case, shape of values actually is matrix but the height(row size) is 1 in acl. If
- // row size is 1, this op is not needed and it means this situtation could be wrong.
- throw std::runtime_error("Wrong usage of EmbeddingLookup op!");
- default:
- throw std::runtime_error("Not supported rank!");
+ if (lookups_buf[i] < 0 || lookups_buf[i] >= first_dim)
+ throw std::runtime_error("Embedding Lookup: index out of bounds.");
}
+ // If each strides of values and output are different, applied padding size of the two tensors are
+ // different, therefore, it can not be copied at once.
+ auto can_copy_at_once = [&]() -> bool {
+ const auto &values_strides = values_info->strides_in_bytes();
+ const auto &output_strides = output_info->strides_in_bytes();
+
+ for (size_t i = 0; i < first_dim_pos; ++i)
+ {
+ if (values_strides[i] != values_strides[i])
+ return false;
+ }
+
+ return true;
+ };
+
+ using ::arm_compute::Window;
+ using ::arm_compute::Iterator;
+
+ size_t copy_bytes;
+ Window window;
+ if (can_copy_at_once())
+ {
+ copy_bytes = values_info->total_size() / first_dim;
+ window.use_tensor_dimensions(output_info->tensor_shape(), first_dim_pos);
+ }
+ else
+ {
+ copy_bytes = values_info->dimension(0) * values_info->element_size();
+ window.use_tensor_dimensions(output_info->tensor_shape(), Window::DimY);
+ }
+
+ Iterator it(_output, window);
+ execute_window_loop(window,
+ [&](const ::arm_compute::Coordinates &id) {
+ ::arm_compute::Coordinates values_id = id;
+ const int idx = id[first_dim_pos];
+ values_id.set(first_dim_pos, lookups_buf[idx]);
+ memcpy(it.ptr(), _values->ptr_to_element(values_id), copy_bytes);
+ },
+ it);
+
if (::internal::arm_compute::isGpuMode())
{
auto &q = ::arm_compute::CLScheduler::get().queue();
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h
index 9f2cd977f..fd499437f 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h
@@ -1,16 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
#ifndef __SIMPLE_EMBEDDING_LOOKUP_H__
#define __SIMPLE_EMBEDDING_LOOKUP_H__
#include "internal/arm_compute.h"
-#include <arm_compute/core/ITensor.h>
-#include <arm_compute/runtime/IFunction.h>
+/**
+ * @file        SimpleEmbeddingLookup.h
+ * @brief       This file contains SimpleEmbeddingLookup class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
+/**
+ * @brief Class to run SimpleEmbeddingLookup Layer
+ */
class SimpleEmbeddingLookup : public ::arm_compute::IFunction
{
public:
+ SimpleEmbeddingLookup(void) : _lookups(nullptr), _values(nullptr), _output(nullptr)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Configure the layer
+ * @param[in] lookups 1D tensor which contains lookup values
+ * @param[in] values The source tensor
+ * @param[in] output The destination tensor
+ * @return N/A
+ */
void configure(::arm_compute::ITensor *lookups, ::arm_compute::ITensor *values,
::arm_compute::ITensor *output);
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
void run() override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc
new file mode 100644
index 000000000..7f8ae2505
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleHashtableLookupLayer.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleHashtableLookupLayer::configure(::arm_compute::ITensor *lookups,
+ ::arm_compute::ITensor *keys,
+ ::arm_compute::ITensor *values,
+ ::arm_compute::ITensor *output,
+ ::arm_compute::ITensor *hits)
+{
+ _lookups = lookups;
+ _keys = keys;
+ _values = values;
+ _output = output;
+ _hits = hits;
+ _lookup_indices.resize(lookups->info()->dimension(0), -1);
+}
+
+void SimpleHashtableLookupLayer::run()
+{
+ auto &queue = ::arm_compute::CLScheduler::get().queue();
+ if (::internal::arm_compute::isGpuMode())
+ {
+ CAST_CL(_lookups)->map(queue);
+ CAST_CL(_keys)->map(queue);
+ CAST_CL(_values)->map(queue);
+ CAST_CL(_output)->map(queue);
+ CAST_CL(_hits)->map(queue);
+ }
+
+ const int32_t *lookups_buf = reinterpret_cast<int32_t *>(_lookups->buffer());
+ const int32_t *keys_buf = reinterpret_cast<int32_t *>(_keys->buffer());
+ uint8_t *hits_buf = reinterpret_cast<uint8_t *>(_hits->buffer());
+
+ const auto lookups_info = _lookups->info();
+ const auto values_info = _values->info();
+ const auto keys_info = _keys->info();
+ const auto output_info = _output->info();
+
+ // NOTE The first dimension's position must be always at the end of dimensions.
+ const auto first_dim_pos = values_info->num_dimensions() - 1;
+ const size_t first_dim = values_info->dimension(first_dim_pos);
+
+ std::map<int32_t, size_t> key_map;
+ const int keys_num = keys_info->dimension(0);
+ for (size_t key_index = 0; key_index < keys_num; key_index++)
+ {
+ key_map[keys_buf[key_index]] = key_index;
+ }
+
+ const int lookups_num = lookups_info->dimension(0);
+ for (size_t i = 0; i < lookups_num; ++i)
+ {
+ const auto lookup_value = lookups_buf[i];
+ const auto it = key_map.find(lookup_value);
+ if (it != key_map.end())
+ {
+ if (it->second >= first_dim)
+ throw std::runtime_error("HashTable Lookup: index out of bounds.");
+ _lookup_indices[i] = it->second;
+ }
+ }
+
+ // If each strides of values and output are different, applied padding size of the two tensors are
+ // different, therefore, it can not be copied at once.
+ auto can_copy_at_once = [&]() -> bool {
+ const auto &values_strides = values_info->strides_in_bytes();
+ const auto &output_strides = output_info->strides_in_bytes();
+
+ for (size_t i = 0; i < first_dim_pos; ++i)
+ {
+ if (values_strides[i] != values_strides[i])
+ return false;
+ }
+
+ return true;
+ };
+
+ using ::arm_compute::Window;
+ using ::arm_compute::Iterator;
+ using ::arm_compute::Coordinates;
+
+ size_t copy_bytes;
+ Window window;
+ if (can_copy_at_once())
+ {
+ copy_bytes = values_info->total_size() / first_dim;
+ window.use_tensor_dimensions(output_info->tensor_shape(), first_dim_pos);
+ }
+ else
+ {
+ copy_bytes = values_info->dimension(0) * values_info->element_size();
+ window.use_tensor_dimensions(output_info->tensor_shape(), Window::DimY);
+ }
+
+ Iterator it(_output, window);
+ execute_window_loop(window,
+ [&](const Coordinates &id) {
+ Coordinates values_id = id;
+ const int idx = id[first_dim_pos];
+ const int lookup_index = _lookup_indices[idx];
+ if (lookup_index >= 0)
+ {
+ values_id.set(first_dim_pos, lookup_index);
+ memcpy(it.ptr(), _values->ptr_to_element(values_id), copy_bytes);
+ hits_buf[lookup_index] = 1;
+ }
+ else
+ {
+ memset(it.ptr(), 0, copy_bytes);
+ hits_buf[lookup_index] = 0;
+ }
+ },
+ it);
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ CAST_CL(_lookups)->unmap(queue);
+ CAST_CL(_keys)->unmap(queue);
+ CAST_CL(_values)->unmap(queue);
+ CAST_CL(_output)->unmap(queue);
+ CAST_CL(_hits)->unmap(queue);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.h
new file mode 100644
index 000000000..ba9d2ec0d
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_HASHTABLE_LOOKUP_H__
+#define __SIMPLE_HASHTABLE_LOOKUP_H__
+
+#include "internal/arm_compute.h"
+
+class SimpleHashtableLookupLayer : public ::arm_compute::IFunction
+{
+public:
+ SimpleHashtableLookupLayer(void)
+ : _lookups(nullptr), _keys(nullptr), _values(nullptr), _output(nullptr), _hits(nullptr)
+ {
+ // DO NOTHING
+ }
+
+ void configure(::arm_compute::ITensor *lookups, ::arm_compute::ITensor *keys,
+ ::arm_compute::ITensor *values, ::arm_compute::ITensor *output,
+ ::arm_compute::ITensor *hits);
+
+ void run() override;
+
+private:
+ ::arm_compute::ITensor *_lookups;
+ ::arm_compute::ITensor *_keys;
+ ::arm_compute::ITensor *_values;
+ ::arm_compute::ITensor *_output;
+ ::arm_compute::ITensor *_hits;
+ std::vector<int32_t> _lookup_indices;
+};
+
+#endif /*__SIMPLE_HASHTABLE_LOOKUP_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.cc
new file mode 100644
index 000000000..d3943ad40
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleNeg.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleNeg::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output)
+{
+ _input = input;
+ _output = output;
+}
+
+void SimpleNeg::run()
+{
+ auto &queue = ::arm_compute::CLScheduler::get().queue();
+ if (::internal::arm_compute::isGpuMode())
+ {
+ CAST_CL(_input)->map(queue);
+ CAST_CL(_output)->map(queue);
+ }
+
+ arm_compute::Window window;
+ window.use_tensor_dimensions(_output->info()->tensor_shape());
+
+ execute_window_loop(window, [this](const arm_compute::Coordinates &id) {
+ // NOTE Must be two input tensors of identical type
+ // Must be output tensor of the same type as input0.
+ assert(_input->info()->data_type() == _output->info()->data_type());
+
+ switch (_input->info()->data_type())
+ {
+ case ::arm_compute::DataType::F32:
+ {
+ const auto input_value = *reinterpret_cast<float *>(_input->ptr_to_element(id));
+ *reinterpret_cast<float *>(_output->ptr_to_element(id)) = -input_value;
+ break;
+ }
+ case ::arm_compute::DataType::S32:
+ {
+ const auto input_value = *reinterpret_cast<int32_t *>(_input->ptr_to_element(id));
+ *reinterpret_cast<int32_t *>(_output->ptr_to_element(id)) = -input_value;
+ break;
+ }
+ case ::arm_compute::DataType::U32:
+ {
+ const auto input_value = *reinterpret_cast<uint32_t *>(_input->ptr_to_element(id));
+ *reinterpret_cast<uint32_t *>(_output->ptr_to_element(id)) = -input_value;
+ break;
+ }
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+ });
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ CAST_CL(_input)->unmap(queue);
+ CAST_CL(_output)->unmap(queue);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/PadLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.h
index cb3f36337..4ca88e7f8 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/PadLayer.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.h
@@ -1,41 +1,39 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __PAD_LAYER_H__
-#define __PAD_LAYER_H__
-
-#include <arm_compute/runtime/CL/CLTensor.h>
-#include <arm_compute/runtime/CL/functions/CLFillBorder.h>
-
-class PadLayer : public ::arm_compute::IFunction
-{
-public:
- void configure(::arm_compute::ICLTensor *input, ::arm_compute::ICLTensor *output,
- unsigned int border_width);
- void run(void) override;
-
-private:
- ::arm_compute::ICLTensor *_input;
- ::arm_compute::ICLTensor *_output;
- int _border_width;
- int _output_height;
- int _output_width;
-
- ::arm_compute::CLFillBorder _fillborderkernel;
- void populateOutput();
-};
-
-#endif // __PAD_LAYER_H__
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_NEG_H__
+#define __SIMPLE_NEG_H__
+
+#include "internal/arm_compute.h"
+
+class SimpleNeg : public ::arm_compute::IFunction
+{
+public:
+ SimpleNeg(void) : _input(nullptr), _output(nullptr)
+ {
+ // DO NOTHING
+ }
+
+ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output);
+
+ void run() override;
+
+private:
+ ::arm_compute::ITensor *_input;
+ ::arm_compute::ITensor *_output;
+};
+
+#endif /*__SIMPLE_NEG_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.cc
new file mode 100644
index 000000000..2a0a25f0c
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.cc
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "internal/arm_compute.h"
+#include "SimplePackLayer.h"
+
+void SimplePackLayer::configure(const std::vector<::arm_compute::ICLTensor *> &input_vector,
+ ::arm_compute::ICLTensor *output, int32_t axis)
+{
+ uint32_t nr_inputs = input_vector.size();
+ uint32_t output_rank = output->info()->num_dimensions();
+ const ::arm_compute::PermutationVector pv{1, 2, 0};
+ _cl_permuted_vector.resize(nr_inputs);
+ _cl_permute_vector.resize(nr_inputs);
+
+ _output = output;
+ // A negative axis implies axis from the end.
+ // For example, axis = -1 implies the first axis from the end, i.e. axis = Rank - 1.
+ // Similarly, axis = -2 imples second axis from the end, i.e. axis = Rank - 2.
+ if (axis < 0)
+ {
+ axis += output_rank;
+ }
+ _axis = ToARMComputeAxis(output_rank, axis).value();
+ _cl_reshape_vector.resize(nr_inputs);
+
+ ::arm_compute::TensorShape subTensor_shape{};
+ for (int i = 0; i < output_rank; i++)
+ {
+ if (i != _axis)
+ {
+ subTensor_shape.set(i, _output->info()->tensor_shape()[i]);
+ }
+ else
+ {
+ subTensor_shape.set(i, 1);
+ }
+ }
+
+ auto subTensor_offset = ::arm_compute::Coordinates{};
+ subTensor_offset.set_num_dimensions(output_rank);
+
+ for (int i = 0; i < input_vector.size(); i++)
+ {
+ _input_vector.push_back(input_vector[i]);
+ subTensor_offset[_axis] = i;
+ auto temp_tensor = std::make_shared<::arm_compute::CLSubTensor>(
+ CAST_CL(_output), subTensor_shape, subTensor_offset, true);
+ _sub_tensor_vector.push_back(temp_tensor);
+ // configure to resize of input tensor in sub tensor offseted, dimension expansion will be
+ // automatic
+ _cl_permute_vector[i].configure(CAST_CL(_input_vector[i]), &_cl_permuted_vector[i], pv);
+ _cl_reshape_vector[i].configure(&_cl_permuted_vector[i], _sub_tensor_vector[i].get());
+ _cl_permuted_vector[i].allocator()->allocate();
+ }
+}
+
+void SimplePackLayer::run(void)
+{
+ for (int i = 0; i < _input_vector.size(); i++)
+ {
+ _cl_permute_vector[i].run();
+ _cl_reshape_vector[i].run();
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.h
new file mode 100644
index 000000000..2c2fc37f2
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __SIMPLE_PACK_LAYER_H__
+#define __SIMPLE_PACK_LAYER_H__
+
+#include <arm_compute/runtime/CL/CLTensor.h>
+#include <arm_compute/runtime/CL/CLSubTensor.h>
+#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h>
+#include <arm_compute/runtime/CL/functions/CLPermute.h>
+
+class SimplePackLayer : public ::arm_compute::IFunction
+{
+public:
+ SimplePackLayer(void)
+ : _cl_permuted_vector{}, _input_vector{}, _sub_tensor_vector{}, _cl_reshape_vector{},
+ _cl_permute_vector{}, _output(nullptr), _axis(0)
+ {
+ // DO NOTHING
+ }
+
+public:
+ void configure(const std::vector<::arm_compute::ICLTensor *> &input_vector,
+ ::arm_compute::ICLTensor *output, int axis);
+
+public:
+ void run(void) override;
+
+private:
+ std::vector<::arm_compute::CLTensor> _cl_permuted_vector;
+ std::vector<::arm_compute::ICLTensor *> _input_vector;
+ std::vector<std::shared_ptr<::arm_compute::CLSubTensor>> _sub_tensor_vector;
+ std::vector<::arm_compute::CLReshapeLayer> _cl_reshape_vector;
+ std::vector<::arm_compute::CLPermute> _cl_permute_vector;
+ ::arm_compute::ICLTensor *_output;
+ int _axis;
+};
+
+#endif // __SIMPLE_PACK_LAYER_H__
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc
new file mode 100644
index 000000000..64236603f
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimplePadLayer.h"
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+namespace
+{
+bool validate_arg(const ::arm_compute::ITensor *input, const ::arm_compute::ITensor *output,
+ const ::arm_compute::ITensor *padding_size,
+ const ::arm_compute::Coordinates &axises)
+{
+ const int input_batch = input->info()->tensor_shape()[axises[0]];
+ const int input_height = input->info()->tensor_shape()[axises[1]];
+ const int input_width = input->info()->tensor_shape()[axises[2]];
+ const int input_depth = input->info()->tensor_shape()[axises[3]];
+
+ const int output_batch = output->info()->tensor_shape()[axises[0]];
+ const int output_height = output->info()->tensor_shape()[axises[1]];
+ const int output_width = output->info()->tensor_shape()[axises[2]];
+ const int output_depth = output->info()->tensor_shape()[axises[3]];
+
+ auto pad_batch_up = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 0}));
+ auto pad_batch_down = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 0}));
+ auto pad_height_top = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 1}));
+ auto pad_height_bottom = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 1}));
+ auto pad_width_left = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 2}));
+ auto pad_width_right = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 2}));
+ auto pad_depth_front = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 3}));
+ auto pad_depth_back = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 3}));
+
+ const int padded_batch = input_batch + pad_batch_up + pad_batch_down;
+ const int padded_height = input_height + pad_height_top + pad_height_bottom;
+ const int padded_width = input_width + pad_width_left + pad_width_right;
+ const int padded_depth = input_depth + pad_depth_front + pad_depth_back;
+
+ return (padded_batch == output_batch) && (padded_height == output_height) &&
+ (padded_width == output_width) && (padded_depth == output_depth);
+}
+} // namespace
+
+void SimplePadLayer::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+ ::arm_compute::ITensor *padding_size,
+ const ::arm_compute::Coordinates &axises)
+{
+
+ const auto rank = axises.num_dimensions();
+ assert(rank == 4);
+ assert(input != nullptr && output != nullptr && padding_size != nullptr);
+
+ for (int i = 0; i < rank; ++i)
+ {
+ assert(axises[i] >= 0);
+ assert(axises[i] < rank);
+ }
+
+ _input = input;
+ _output = output;
+ _padding_size = padding_size;
+ _axises = axises;
+}
+
+template <typename T>
+inline void ApplyPadding(const ::arm_compute::ITensor *input_data,
+ const ::arm_compute::TensorShape &input_shape,
+ const ::arm_compute::ITensor *padding_size,
+ ::arm_compute::ITensor *output_data,
+ const ::arm_compute::TensorShape &output_shape,
+ const ::arm_compute::Coordinates &axises, T zero_value)
+{
+
+ assert(validate_arg(input_data, output_data, padding_size, axises) &&
+ "Padded Input shape does not match to output shape");
+
+ const int input_batch = input_shape[axises[0]];
+ const int input_height = input_shape[axises[1]];
+ const int input_width = input_shape[axises[2]];
+ const int input_depth = input_shape[axises[3]];
+
+ const int output_batch = output_shape[axises[0]];
+ const int output_height = output_shape[axises[1]];
+ const int output_width = output_shape[axises[2]];
+ const int output_depth = output_shape[axises[3]];
+
+ // Padding size for Up, Top, Left and Front are required.
+ auto pad_batch_up = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 0}));
+ auto pad_height_top = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 1}));
+ auto pad_width_left = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 2}));
+ auto pad_depth_front = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 3}));
+
+ for (int out_b = 0; out_b < output_batch; ++out_b)
+ {
+ for (int out_h = 0; out_h < output_height; ++out_h)
+ {
+ for (int out_w = 0; out_w < output_width; ++out_w)
+ {
+ for (int out_d = 0; out_d < output_depth; ++out_d)
+ {
+ auto output_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
+
+ if (out_b < pad_batch_up || out_b >= (input_batch + pad_batch_up) ||
+ out_h < pad_height_top || out_h >= (input_height + pad_height_top) ||
+ out_w < pad_width_left || out_w >= (input_width + pad_width_left) ||
+ out_d < pad_depth_front || out_d >= (input_depth + pad_depth_front))
+ {
+ *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) = zero_value;
+ }
+ else
+ {
+ auto input_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{out_b - pad_batch_up, out_h - pad_height_top,
+ out_w - pad_width_left, out_d - pad_depth_front},
+ axises);
+ *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) =
+ *reinterpret_cast<T *>(input_data->ptr_to_element(input_id));
+ }
+ }
+ }
+ }
+ }
+}
+void SimplePadLayer::run()
+{
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->map(q);
+ CAST_CL(_output)->map(q);
+ CAST_CL(_padding_size)->map(q);
+ }
+
+ switch (_input->info()->data_type())
+ {
+ case ::arm_compute::DataType::U8:
+ case ::arm_compute::DataType::QASYMM8:
+ ApplyPadding<uint8_t>(_input, _input->info()->tensor_shape(), _padding_size, _output,
+ _output->info()->tensor_shape(), _axises,
+ _input->info()->quantization_info().offset);
+ break;
+ case ::arm_compute::DataType::F32:
+ ApplyPadding<float>(_input, _input->info()->tensor_shape(), _padding_size, _output,
+ _output->info()->tensor_shape(), _axises, 0.0f);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("DataType not supported");
+ break;
+ }
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->unmap(q);
+ CAST_CL(_output)->unmap(q);
+ CAST_CL(_padding_size)->unmap(q);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h
new file mode 100644
index 000000000..8cb6659ce
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_PAD_LAYER_H__
+#define __SIMPLE_PAD_LAYER_H__
+
+#include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
+
+class SimplePadLayer : public ::arm_compute::IFunction
+{
+public:
+ SimplePadLayer(void) : _input(nullptr), _output(nullptr), _padding_size(nullptr), _axises{}
+ {
+ // DO NOTHING
+ }
+
+ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+ ::arm_compute::ITensor *padding_size,
+ const ::arm_compute::Coordinates &axises = getARMComputeAxises(4));
+
+ void run(void) override;
+
+private:
+ ::arm_compute::ITensor *_input;
+ ::arm_compute::ITensor *_output;
+ ::arm_compute::ITensor *_padding_size;
+ ::arm_compute::Coordinates _axises;
+};
+
+#endif // __SIMPLE_PAD_LAYER_H__
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc
new file mode 100644
index 000000000..b5b3a0950
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleSQRT.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleSQRT::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output)
+{
+ _input = input;
+ _output = output;
+}
+
+void SimpleSQRT::run()
+{
+ auto &queue = ::arm_compute::CLScheduler::get().queue();
+ if (::internal::arm_compute::isGpuMode())
+ {
+ CAST_CL(_input)->map(queue);
+ CAST_CL(_output)->map(queue);
+ }
+
+ arm_compute::Window window;
+ window.use_tensor_dimensions(_output->info()->tensor_shape());
+
+ execute_window_loop(window, [this](const arm_compute::Coordinates &id) {
+ // NOTE Must be two input tensors of identical type
+ // Must be output tensor of the same type as input0.
+ assert(_input->info()->data_type() == _output->info()->data_type());
+
+ const auto input_value = *reinterpret_cast<float *>(_input->ptr_to_element(id));
+ *reinterpret_cast<float *>(_output->ptr_to_element(id)) = sqrt(input_value);
+ });
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ CAST_CL(_input)->unmap(queue);
+ CAST_CL(_output)->unmap(queue);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h
new file mode 100644
index 000000000..b05a9e32e
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_SQRT_H__
+#define __SIMPLE_SQRT_H__
+
+#include "internal/arm_compute.h"
+
+class SimpleSQRT : public ::arm_compute::IFunction
+{
+public:
+ SimpleSQRT(void) : _input(nullptr), _output(nullptr)
+ {
+ // DO NOTHING
+ }
+
+ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output);
+
+ void run() override;
+
+private:
+ ::arm_compute::ITensor *_input;
+ ::arm_compute::ITensor *_output;
+};
+
+#endif /*__SIMPLE_SQRT_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc
new file mode 100644
index 000000000..f53675b99
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleSpaceToBatchND.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleSpaceToBatchND::configure(::arm_compute::ITensor *input,
+ ::arm_compute::ITensor *block_size,
+ ::arm_compute::ITensor *padding_size,
+ ::arm_compute::ITensor *output)
+{
+ const auto rank = input->info()->num_dimensions();
+ assert(rank == 4);
+
+ _input = input;
+ _block_size = block_size;
+ _padding_size = padding_size;
+ _output = output;
+}
+
+template <typename T>
+inline void
+SpaceToBatchND(const ::arm_compute::ITensor *input, const ::arm_compute::TensorShape &input_shape,
+ const ::arm_compute::ITensor *block_size, const ::arm_compute::ITensor *padding_size,
+ const ::arm_compute::ITensor *output, const ::arm_compute::TensorShape &output_shape,
+ T zero_value)
+{
+ const int input_batch = input_shape[3];
+ const int input_height = input_shape[1];
+ const int input_width = input_shape[0];
+
+ const int depth = output_shape[2];
+
+ const int padding_height_left = *reinterpret_cast<int *>(padding_size->ptr_to_element({0, 1}));
+ const int padding_height_right = *reinterpret_cast<int *>(padding_size->ptr_to_element({1, 1}));
+ const int padding_width_left = *reinterpret_cast<int *>(padding_size->ptr_to_element({0, 0}));
+ const int padding_width_right = *reinterpret_cast<int *>(padding_size->ptr_to_element({1, 0}));
+ const int padded_height = input_height + padding_height_left + padding_height_right;
+ const int padded_width = input_width + padding_width_left + padding_width_right;
+
+ const int block_size_height = *reinterpret_cast<int *>(block_size->ptr_to_element({1}));
+ const int block_size_width = *reinterpret_cast<int *>(block_size->ptr_to_element({0}));
+
+ assert(padding_height_left >= 0);
+ assert(padding_height_right >= 0);
+ assert(padding_width_left >= 0);
+ assert(padding_width_right >= 0);
+ assert(block_size_height >= 1);
+ assert(block_size_width >= 1);
+ assert(padded_height % block_size_height == 0);
+ assert(padded_width % block_size_width == 0);
+ assert(output->info()->dimension(3) ==
+ input->info()->dimension(3) * (block_size_height * block_size_width));
+
+ for (int in_b = 0; in_b < input_batch; ++in_b)
+ {
+ for (int in_d = 0; in_d < depth; ++in_d)
+ {
+ for (int in_h = 0; in_h < padded_height; ++in_h)
+ {
+ for (int in_w = 0; in_w < padded_width; ++in_w)
+ {
+ const int out_d = in_d;
+ const int out_h = in_h / block_size_height;
+ const int out_w = in_w / block_size_width;
+ const int out_b =
+ in_b +
+ ((in_h % block_size_height) * block_size_width + in_w % block_size_width) *
+ input_batch;
+
+ const ::arm_compute::Coordinates output_id{out_w, out_h, out_d, out_b};
+
+ if (in_h < padding_height_left || in_h >= (input_height + padding_height_left) ||
+ in_w < padding_width_left || in_w >= (input_width + padding_width_left))
+ {
+ *reinterpret_cast<T *>(output->ptr_to_element(output_id)) = zero_value;
+ }
+ else
+ {
+ const ::arm_compute::Coordinates input_id{in_w - padding_width_left,
+ in_h - padding_height_left, in_d, in_b};
+ *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+ *reinterpret_cast<T *>(input->ptr_to_element(input_id));
+ }
+ }
+ }
+ }
+ }
+}
+void SimpleSpaceToBatchND::run()
+{
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->map(q);
+ CAST_CL(_block_size)->map(q);
+ CAST_CL(_padding_size)->map(q);
+ CAST_CL(_output)->map(q);
+ }
+
+ switch (_input->info()->data_type())
+ {
+ case ::arm_compute::DataType::U8:
+ case ::arm_compute::DataType::QASYMM8:
+ SpaceToBatchND<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _padding_size,
+ _output, _output->info()->tensor_shape(),
+ _input->info()->quantization_info().offset);
+ break;
+ case ::arm_compute::DataType::F32:
+ SpaceToBatchND<float>(_input, _input->info()->tensor_shape(), _block_size, _padding_size,
+ _output, _output->info()->tensor_shape(), 0.0f);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("DataType not supported");
+ break;
+ }
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->unmap(q);
+ CAST_CL(_block_size)->unmap(q);
+ CAST_CL(_padding_size)->unmap(q);
+ CAST_CL(_output)->unmap(q);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h
new file mode 100644
index 000000000..4af961d34
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_SPACE_TO_BATCHND_H__
+#define __SIMPLE_SPACE_TO_BATCHND_H__
+
+#include "internal/arm_compute.h"
+
+class SimpleSpaceToBatchND : public ::arm_compute::IFunction
+{
+public:
+ SimpleSpaceToBatchND(void)
+ : _input(nullptr), _block_size(nullptr), _padding_size(nullptr), _output(nullptr)
+ {
+ // DO NOTHING
+ }
+
+ /** Initialise input and output
+ *
+ * @param[in] input First tensor input.
+ * @param[in] block_size Block size.
+ * @param[in] padding_size Padding size.
+ * @param[out] output Output tensor.
+ */
+ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *block_size,
+ ::arm_compute::ITensor *padding_size, ::arm_compute::ITensor *output);
+
+ void run() override;
+
+private:
+ ::arm_compute::ITensor *_input;
+ ::arm_compute::ITensor *_block_size;
+ ::arm_compute::ITensor *_padding_size;
+ ::arm_compute::ITensor *_output;
+};
+
+#endif /*__SIMPLE_SPACE_TO_BATCHND_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc
index 682295f81..3519da1f3 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc
@@ -19,11 +19,8 @@
#include <arm_compute/runtime/CL/CLScheduler.h>
void SimpleSpaceToDepth::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
- int32_t block_size,
- const ::arm_compute::Coordinates &axises = {3, 1, 0, 2})
+ int32_t block_size, const ::arm_compute::Coordinates &axises)
{
- assert(input->info()->num_dimensions() == 4);
- assert(output->info()->num_dimensions() == 4);
const auto rank = axises.num_dimensions();
assert(rank == 4);
for (int i = 0; i < rank; ++i)
@@ -38,26 +35,10 @@ void SimpleSpaceToDepth::configure(::arm_compute::ITensor *input, ::arm_compute:
_axises = axises;
}
-inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w,
- int32_t d, const ::arm_compute::Coordinates &axises)
-{
- // b, h, w, d >= 0
- size_t indexes[4];
- indexes[axises[0]] = b;
- indexes[axises[1]] = h;
- indexes[axises[2]] = w;
- indexes[axises[3]] = d;
-
- int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0];
- offset += indexes[2] * shape[1] * shape[0];
- offset += indexes[1] * shape[0];
- offset += indexes[0];
- return offset;
-}
-
template <typename T>
-inline void SpaceToDepth(const T *input_data, const ::arm_compute::TensorShape &input_shape,
- int32_t block_size, T *output_data,
+inline void SpaceToDepth(const ::arm_compute::ITensor *input,
+ const ::arm_compute::TensorShape &input_shape, int32_t block_size,
+ ::arm_compute::ITensor *output,
const ::arm_compute::TensorShape &output_shape,
const ::arm_compute::Coordinates &axises)
{
@@ -66,16 +47,6 @@ inline void SpaceToDepth(const T *input_data, const ::arm_compute::TensorShape &
const int input_width = input_shape[axises[2]];
const int input_depth = input_shape[axises[3]];
- const int output_batch = output_shape[axises[0]];
- const int output_height = output_shape[axises[1]];
- const int output_width = output_shape[axises[2]];
- const int output_depth = output_shape[axises[3]];
-
- assert(input_batch == output_batch);
- assert(input_height == output_height * block_size);
- assert(input_width == output_width * block_size);
- assert(input_depth * block_size * block_size == output_depth);
-
for (int in_b = 0; in_b < input_batch; ++in_b)
{
for (int in_h = 0; in_h < input_height; ++in_h)
@@ -90,10 +61,13 @@ inline void SpaceToDepth(const T *input_data, const ::arm_compute::TensorShape &
const int out_d =
in_d + ((in_h % block_size) * block_size + in_w % block_size) * input_depth;
- const int input_index = Offset4D(input_shape, in_b, in_h, in_w, in_d, axises);
- const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises);
+ auto input_id =
+ asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises);
+ auto output_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
- output_data[output_index] = input_data[input_index];
+ *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+ *reinterpret_cast<T *>(input->ptr_to_element(input_id));
}
}
}
@@ -110,35 +84,16 @@ void SimpleSpaceToDepth::run()
CAST_CL(_output)->map(q);
}
- auto input_buf = _input->buffer();
- auto output_buf = _output->buffer();
switch (_input->info()->data_type())
{
case ::arm_compute::DataType::U8:
case ::arm_compute::DataType::QASYMM8:
- SpaceToDepth(reinterpret_cast<const uint8_t *>(input_buf), _input->info()->tensor_shape(),
- _block_size, reinterpret_cast<uint8_t *>(output_buf),
- _output->info()->tensor_shape(), _axises);
- break;
- case ::arm_compute::DataType::S8:
- SpaceToDepth(reinterpret_cast<const int8_t *>(input_buf), _input->info()->tensor_shape(),
- _block_size, reinterpret_cast<int8_t *>(output_buf),
- _output->info()->tensor_shape(), _axises);
- break;
- case ::arm_compute::DataType::U32:
- SpaceToDepth(reinterpret_cast<const uint32_t *>(input_buf), _input->info()->tensor_shape(),
- _block_size, reinterpret_cast<uint32_t *>(output_buf),
- _output->info()->tensor_shape(), _axises);
- break;
- case ::arm_compute::DataType::S32:
- SpaceToDepth(reinterpret_cast<const int32_t *>(input_buf), _input->info()->tensor_shape(),
- _block_size, reinterpret_cast<int32_t *>(output_buf),
- _output->info()->tensor_shape(), _axises);
+ SpaceToDepth<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
break;
case ::arm_compute::DataType::F32:
- SpaceToDepth(reinterpret_cast<const float *>(input_buf), _input->info()->tensor_shape(),
- _block_size, reinterpret_cast<float *>(output_buf),
- _output->info()->tensor_shape(), _axises);
+ SpaceToDepth<float>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
break;
default:
ARM_COMPUTE_ERROR("DataType not supported");
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h
index f5e028b1c..9e87c364c 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h
@@ -14,25 +14,44 @@
* limitations under the License.
*/
+/**
+ * @file        SimpleSpaceToDepth.h
+ * @brief       This file contains SimpleSpaceToDepth class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __SIMPLE_SPACE_TO_DEPTH_H__
#define __SIMPLE_SPACE_TO_DEPTH_H__
#include "internal/arm_compute.h"
-#include <arm_compute/core/ITensor.h>
-#include <arm_compute/runtime/IFunction.h>
+#include "internal/arm_compute/Cast.h"
+/**
+ * @brief Class to run SimpleEmbeddingLookup Layer
+ */
class SimpleSpaceToDepth : public ::arm_compute::IFunction
{
public:
- /** Initialise input and output
- *
- * @param[in] input First tensor input.
- * @param[out] output Output tensor.
- * @param[in] block_size Block size.
+ SimpleSpaceToDepth(void) : _input(nullptr), _output(nullptr), _block_size(0), _axises{}
+ {
+ // DO NOTHING
+ }
+
+ /**
+ * @brief Configure the layer
+ * @param[in] input First tensor input.
+ * @param[in] output Output tensor.
+ * @param[in] block_size Block size.
+ * @param[in] axises Axises of rank 4
+ * @return N/A
*/
void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size,
- const ::arm_compute::Coordinates &axises);
+ const ::arm_compute::Coordinates &axises = getARMComputeAxises(4));
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
void run() override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc
new file mode 100644
index 000000000..abc291289
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleTransposeConv.h"
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleTransposeConv::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *weights,
+ ::arm_compute::ITensor *output,
+ ::arm_compute::PadStrideInfo &tconv_info,
+ ::arm_compute::Coordinates axises)
+{
+ auto rank = axises.num_dimensions();
+
+ assert(rank == 4);
+
+ _input = input;
+ _weights = weights;
+ _output = output;
+ _stride_width = tconv_info.stride().first;
+ _stride_height = tconv_info.stride().second;
+ _pad_width = tconv_info.pad_left();
+ _pad_height = tconv_info.pad_top();
+ _axises = axises;
+}
+
+template <typename T>
+inline void ApplyTransposeConv(
+ const ::arm_compute::TensorShape &input_shape, const ::arm_compute::ITensor *input_data,
+ const ::arm_compute::TensorShape &filter_shape, const ::arm_compute::ITensor *filter_data,
+ const ::arm_compute::TensorShape &output_shape, const ::arm_compute::ITensor *output_data,
+ const int32_t stride_width, const int32_t stride_height, const int32_t pad_width,
+ const int32_t pad_height, const ::arm_compute::Coordinates axises)
+{
+ const int batches = input_shape[axises[0]];
+ const int input_height = input_shape[axises[1]];
+ const int input_width = input_shape[axises[2]];
+ const int input_depth = input_shape[axises[3]];
+
+ const int filter_height = filter_shape[axises[1]];
+ const int filter_width = filter_shape[axises[2]];
+
+ const int output_height = output_shape[axises[1]];
+ const int output_width = output_shape[axises[2]];
+ const int output_depth = output_shape[axises[3]];
+
+ assert(batches == output_shape[axises[0]]);
+ assert(input_depth == filter_shape[axises[3]]);
+ assert(filter_shape[axises[0]] == output_depth);
+
+ // Although transpose convolution simplifies to convolution with transposed
+ // weights for strides of 1, non-unitary striding complicates matters. To
+ // keep this reference implementation as clear as possible, we use a
+ // "scatter" access pattern, where we loop through all the input elements,
+ // computing their influence on the output, rather than looping through the
+ // output elements in the typical "gather" access pattern of a conv. We
+ // therefore must initialize the output array to zero.
+
+ // Loop through input elements one at a time.
+ for (int batch = 0; batch < batches; ++batch)
+ {
+ for (int in_y = 0; in_y < input_height; ++in_y)
+ {
+ for (int in_x = 0; in_x < input_width; ++in_x)
+ {
+ for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+ {
+ // Loop through the output elements it will influence
+ const int out_x_origin = (in_x * stride_width) - pad_width;
+ const int out_y_origin = (in_y * stride_height) - pad_height;
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ for (int out_channel = 0; out_channel < output_depth; ++out_channel)
+ {
+ // Compute output element location
+ const int out_x = out_x_origin + filter_x;
+ const int out_y = out_y_origin + filter_y;
+ // We cannot accumulate out of bounds
+ if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
+ (out_y < output_height))
+ {
+ auto input_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{batch, in_y, in_x, in_channel}, axises);
+ auto filter_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{in_channel, filter_y, filter_x, out_channel},
+ axises);
+ auto output_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{batch, out_y, out_x, out_channel}, axises);
+ T input_value = *reinterpret_cast<T *>(input_data->ptr_to_element(input_id));
+ T filter_value = *reinterpret_cast<T *>(filter_data->ptr_to_element(filter_id));
+ *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) +=
+ input_value * filter_value;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+void SimpleTransposeConv::run()
+{
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->map(q);
+ CAST_CL(_weights)->map(q);
+ CAST_CL(_output)->map(q);
+ }
+
+ switch (_input->info()->data_type())
+ {
+ case ::arm_compute::DataType::S32:
+ ApplyTransposeConv<int32_t>(_input->info()->tensor_shape(), _input,
+ _weights->info()->tensor_shape(), _weights,
+ _output->info()->tensor_shape(), _output, _stride_width,
+ _stride_height, _pad_width, _pad_height, _axises);
+ break;
+ case ::arm_compute::DataType::F32:
+ ApplyTransposeConv<float>(_input->info()->tensor_shape(), _input,
+ _weights->info()->tensor_shape(), _weights,
+ _output->info()->tensor_shape(), _output, _stride_width,
+ _stride_height, _pad_width, _pad_height, _axises);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("DataType not supported");
+ break;
+ }
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->unmap(q);
+ CAST_CL(_weights)->unmap(q);
+ CAST_CL(_output)->unmap(q);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.h
new file mode 100644
index 000000000..c5519828b
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TRANSPOSE_CONV_EX__
+#define __TRANSPOSE_CONV_EX__
+
+#include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
+
+class SimpleTransposeConv : public ::arm_compute::IFunction
+{
+public:
+ SimpleTransposeConv()
+ : _input(nullptr), _weights(nullptr), _output(nullptr), _stride_width(0), _stride_height(0),
+ _pad_width(0), _pad_height(0)
+ {
+ // DO NOTHING
+ }
+
+ /** Initialise input and output
+ *
+ * @param[in] input First tensor input.
+ * @param[in] weights Weights
+ * @param[out] output Output tensor.
+ * @param[in] tc_info Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] axises Axises of rank 4
+ */
+ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *weights,
+ ::arm_compute::ITensor *output, ::arm_compute::PadStrideInfo &tconv_info,
+ ::arm_compute::Coordinates axises = getARMComputeAxises(4));
+
+ void run() override;
+
+private:
+ ::arm_compute::ITensor *_input;
+ ::arm_compute::ITensor *_weights;
+ ::arm_compute::ITensor *_output;
+ int32_t _stride_width;
+ int32_t _stride_height;
+ int32_t _pad_width;
+ int32_t _pad_height;
+ ::arm_compute::Coordinates _axises;
+};
+
+#endif /*__TRANSPOSE_CONV_EX__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.cc
new file mode 100644
index 000000000..910595a44
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "internal/arm_compute.h"
+#include "SimpleUnpackLayer.h"
+
+void SimpleUnpackLayer::configure(::arm_compute::ICLTensor *input,
+ const std::vector<::arm_compute::ICLTensor *> &output_vector,
+ int32_t axis)
+{
+ uint32_t nr_outputs = output_vector.size();
+ _cl_permuted_vector.resize(nr_outputs);
+ _cl_permute_vector.resize(nr_outputs);
+ uint32_t input_rank = input->info()->num_dimensions();
+ const ::arm_compute::PermutationVector pv{2, 0, 1};
+ _input = input;
+ // Negatige axis is supported, -1 implies R-1 axis where R is input rank
+ if (axis < 0)
+ {
+ axis += input_rank;
+ }
+ _axis = ToARMComputeAxis(input_rank, axis).value();
+ _cl_reshape_vector.resize(nr_outputs);
+
+ ::arm_compute::TensorShape subTensor_shape{};
+ for (int i = 0; i < input_rank; i++)
+ {
+ if (i != _axis)
+ {
+ subTensor_shape.set(i, _input->info()->tensor_shape()[i]);
+ }
+ else
+ {
+ subTensor_shape.set(i, 1);
+ }
+ }
+
+ auto subTensor_offset = ::arm_compute::Coordinates{};
+ subTensor_offset.set_num_dimensions(input_rank);
+
+ for (int i = 0; i < output_vector.size(); i++)
+ {
+ _output_vector.push_back(output_vector[i]);
+ subTensor_offset[_axis] = i;
+ auto temp_tensor = std::make_shared<::arm_compute::CLSubTensor>(
+ CAST_CL(_input), subTensor_shape, subTensor_offset, true);
+ _sub_tensor_vector.push_back(temp_tensor);
+ // Copies into the subtensor
+ _cl_permute_vector[i].configure(_sub_tensor_vector[i].get(), &_cl_permuted_vector[i], pv);
+ _cl_reshape_vector[i].configure(&_cl_permuted_vector[i], CAST_CL(_output_vector[i]));
+ _cl_permuted_vector[i].allocator()->allocate();
+ }
+}
+
+void SimpleUnpackLayer::run(void)
+{
+ for (int i = 0; i < _output_vector.size(); i++)
+ {
+ _cl_permute_vector[i].run();
+ _cl_reshape_vector[i].run();
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.h
new file mode 100644
index 000000000..52fc7513d
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __UNPACK_LAYER_H__
+#define __UNPACK_LAYER_H__
+
+#include <arm_compute/runtime/CL/CLTensor.h>
+#include <arm_compute/runtime/CL/CLSubTensor.h>
+#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h>
+#include <arm_compute/runtime/CL/functions/CLPermute.h>
+
+class SimpleUnpackLayer : public ::arm_compute::IFunction
+{
+public:
+ SimpleUnpackLayer(void)
+ : _cl_permuted_vector{}, _output_vector{}, _sub_tensor_vector{}, _cl_reshape_vector{},
+ _cl_permute_vector{}, _input(nullptr), _axis(0)
+ {
+ // DO NOTHING
+ }
+
+public:
+ void configure(::arm_compute::ICLTensor *input,
+ const std::vector<::arm_compute::ICLTensor *> &output_vector, int32_t axis);
+
+public:
+ void run(void) override;
+
+private:
+ std::vector<::arm_compute::CLTensor> _cl_permuted_vector;
+ std::vector<::arm_compute::ICLTensor *> _output_vector;
+ std::vector<std::shared_ptr<::arm_compute::CLSubTensor>> _sub_tensor_vector;
+ std::vector<::arm_compute::CLReshapeLayer> _cl_reshape_vector;
+ std::vector<::arm_compute::CLPermute> _cl_permute_vector;
+ ::arm_compute::ICLTensor *_input;
+ int32_t _axis;
+};
+
+#endif // __UNPACK_LAYER_H__
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc b/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc
deleted file mode 100644
index 3f988a819..000000000
--- a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-#include "SquaredDifferenceOperation.h"
-#include "internal/arm_compute.h"
-
-void SquaredDifferenceOperation::configure(::arm_compute::ITensor *input1,
- ::arm_compute::ITensor *input2,
- ::arm_compute::ITensor *output,
- ::arm_compute::ConvertPolicy ConvertPolicy, float scale,
- ::arm_compute::RoundingPolicy RoundingPolicy)
-{
- _input1 = input1;
- _input2 = input2;
- _output = output;
-
- if (::internal::arm_compute::isGpuMode())
- {
- _cl_sub.configure(CAST_CL(input1), CAST_CL(input2), CAST_CL(output), ConvertPolicy);
- _cl_mul.configure(CAST_CL(output), CAST_CL(output), CAST_CL(output), scale, ConvertPolicy,
- RoundingPolicy);
- }
- else
- {
- _neon_sub.configure(CAST_NE(input1), CAST_NE(input2), CAST_NE(output), ConvertPolicy);
- _neon_mul.configure(CAST_NE(output), CAST_NE(output), CAST_NE(output), scale, ConvertPolicy,
- RoundingPolicy);
- }
-}
-
-void SquaredDifferenceOperation::run(void)
-{
- if (::internal::arm_compute::isGpuMode())
- {
- _cl_sub.run();
- _cl_mul.run();
- }
- else
- {
- _neon_sub.run();
- _neon_mul.run();
- }
-}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h b/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h
deleted file mode 100644
index 3782c4e8c..000000000
--- a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef __SQUARED_DIFFERENCE_OPERATION_H__
-#define __SQUARED_DIFFERENCE_OPERATION_H__
-
-#include <arm_compute/runtime/Tensor.h>
-#include <arm_compute/runtime/CL/CLTensor.h>
-
-#include <arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h>
-#include <arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h>
-#include <arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h>
-#include <arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h>
-
-class SquaredDifferenceOperation : public ::arm_compute::IFunction
-{
-public:
- void configure(::arm_compute::ITensor *input1, ::arm_compute::ITensor *input2,
- ::arm_compute::ITensor *output, ::arm_compute::ConvertPolicy ConvertPolicy,
- float scale, ::arm_compute::RoundingPolicy RoundingPolicy);
-
-public:
- void run(void) override;
-
-private:
- ::arm_compute::ITensor *_input1;
- ::arm_compute::ITensor *_input2;
-
- ::arm_compute::ITensor *_output;
-
-private:
- ::arm_compute::CLArithmeticSubtraction _cl_sub;
- ::arm_compute::CLPixelWiseMultiplication _cl_mul;
-
- ::arm_compute::NEArithmeticSubtraction _neon_sub;
- ::arm_compute::NEPixelWiseMultiplication _neon_mul;
-};
-#endif // __SQUARED_DIFFERENCE_OPERATION_H__