summaryrefslogtreecommitdiff
path: root/runtimes/neurun/src/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes/neurun/src/kernel')
-rw-r--r--runtimes/neurun/src/kernel/acl_cl/CLFunction.h55
-rw-r--r--runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt4
-rw-r--r--runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc27
-rw-r--r--runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h20
-rw-r--r--runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc94
-rw-r--r--runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.h67
-rw-r--r--runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc94
-rw-r--r--runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.h67
-rw-r--r--runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc38
-rw-r--r--runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h4
-rw-r--r--runtimes/neurun/src/kernel/cpu/CMakeLists.txt2
-rw-r--r--runtimes/neurun/src/kernel/cpu/ConcatLayer.cc63
-rw-r--r--runtimes/neurun/src/kernel/cpu/ConcatLayer.h4
-rw-r--r--runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc146
-rw-r--r--runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h4
-rw-r--r--runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc77
-rw-r--r--runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h4
-rw-r--r--runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc37
-rw-r--r--runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h4
-rw-r--r--runtimes/neurun/src/kernel/cpu/OperationUtils.cc2
-rw-r--r--runtimes/neurun/src/kernel/cpu/OperationUtils.h55
-rw-r--r--runtimes/neurun/src/kernel/cpu/PermuteLayer.cc201
-rw-r--r--runtimes/neurun/src/kernel/cpu/PermuteLayer.h58
-rw-r--r--runtimes/neurun/src/kernel/cpu/ReshapeLayer.h4
-rw-r--r--runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc72
-rw-r--r--runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h4
-rw-r--r--runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.cc90
-rw-r--r--runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.h67
-rw-r--r--runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.cc90
-rw-r--r--runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.h67
30 files changed, 660 insertions, 861 deletions
diff --git a/runtimes/neurun/src/kernel/acl_cl/CLFunction.h b/runtimes/neurun/src/kernel/acl_cl/CLFunction.h
new file mode 100644
index 000000000..f34210c8a
--- /dev/null
+++ b/runtimes/neurun/src/kernel/acl_cl/CLFunction.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_KERNEL_ACL_CL_CL_FUNCTION_H__
+#define __NEURUN_KERNEL_ACL_CL_CL_FUNCTION_H__
+
+#include "exec/interface/IFunction.h"
+#include <arm_compute/runtime/IFunction.h>
+#include <memory>
+
+namespace neurun
+{
+namespace kernel
+{
+namespace acl_cl
+{
+
+class CLFunction : public ::neurun::exec::IFunction
+{
+public:
+ CLFunction() = delete;
+
+public:
+ CLFunction(std::unique_ptr<::arm_compute::IFunction> &&func)
+ : _func(std::forward<std::unique_ptr<::arm_compute::IFunction>>(func))
+ {
+ // DO NOTHING
+ }
+
+public:
+ void run() override { _func->run(); }
+ void prepare() override { _func->prepare(); }
+
+private:
+ std::unique_ptr<::arm_compute::IFunction> _func;
+};
+
+} // namespace acl_cl
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_ACL_CL_CL_FUNCTION_H__
diff --git a/runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt b/runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt
index 857fe6fe6..0658effea 100644
--- a/runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt
+++ b/runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt
@@ -4,11 +4,9 @@ add_library(${LIB_NEURUN_KERNEL_ACL_CL} STATIC ${SOURCES})
target_include_directories(${LIB_NEURUN_KERNEL_ACL_CL} PUBLIC ${NNFW_INCLUDE_DIR})
target_include_directories(${LIB_NEURUN_KERNEL_ACL_CL} PUBLIC ${NEURUN_INCLUDE_DIR})
-target_include_directories(${LIB_NEURUN_KERNEL_ACL_CL} PUBLIC ${CMAKE_SOURCE_DIR}/externals/tensorflow) # TODO We should not need this
target_link_libraries(${LIB_NEURUN_KERNEL_ACL_CL} arm_compute)
-target_link_libraries(${LIB_NEURUN_KERNEL_ACL_CL} tensorflow-lite) # TODO We should not need this
-target_link_libraries(${LIB_NEURUN_KERNEL_ACL_CL} ${LIB_NEURUN_KERNEL_CPU}) # TODO We should not need this
+target_link_libraries(${LIB_NEURUN_KERNEL_ACL_CL} nnfw_lib_misc)
set_target_properties(${LIB_NEURUN_KERNEL_ACL_CL} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${LIB_NEURUN_KERNEL_ACL_CL} PROPERTIES OUTPUT_NAME kernel_acl_cl)
diff --git a/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc
index b75ac90f0..3844317ab 100644
--- a/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc
+++ b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc
@@ -18,23 +18,23 @@
#include <arm_compute/runtime/CL/CLScheduler.h>
-#include "backend/acl_cl/kernel/View.h"
-#include "logging.h"
+#include "util/feature/nchw/View.h"
+#include "util/logging.h"
namespace
{
-bool matchSizeExceptAxis(const ::arm_compute::ICLTensor *t1, const ::arm_compute::ICLTensor *t2,
- uint32_t axis)
+bool matchSizeExceptAxis(const ::neurun::backend::acl_cl::operand::ICLTensor *t1,
+ const ::neurun::backend::acl_cl::operand::ICLTensor *t2, uint32_t axis)
{
- assert(t1->info()->num_dimensions() <= 4);
- assert(t2->info()->num_dimensions() <= 4);
+ assert(t1->num_dimensions() <= 4);
+ assert(t2->num_dimensions() <= 4);
for (uint32_t i = 0; i < 4; i++)
{
if (axis == i)
continue;
- if (t1->info()->dimension(i) != t2->info()->dimension(i))
+ if (t1->dimension(i) != t2->dimension(i))
return false;
}
return true;
@@ -66,10 +66,10 @@ bool ConcatLayer::concatenationFloat32()
for (auto input : _input_allocs)
{
assert(matchSizeExceptAxis(_output_alloc, input, _axis));
- axis_sum += input->info()->dimension(_axis);
+ axis_sum += input->dimension(_axis);
}
- assert(_output_alloc->info()->dimension(_axis) == axis_sum);
+ assert(_output_alloc->dimension(_axis) == axis_sum);
}
VERBOSE(Concat_RUN) << "START Concat" << std::endl;
@@ -81,12 +81,12 @@ bool ConcatLayer::concatenationFloat32()
auto &queue = ::arm_compute::CLScheduler::get().queue();
_output_alloc->map(queue);
- ::internal::arm_compute::kernel::View<float> output_view{_output_alloc};
+ util::feature::nchw::View<float> output_view{_output_alloc};
for (auto input : _input_allocs)
{
input->map(queue);
- const ::internal::arm_compute::kernel::View<float> input_reader{input};
+ const util::feature::nchw::View<float> input_reader{input};
for (uint32_t n = 0; n < input_reader.shape().N; n++)
{
@@ -124,8 +124,9 @@ bool ConcatLayer::concatenationFloat32()
return true;
}
-void ConcatLayer::configure(const std::vector<::arm_compute::ICLTensor *> &input_allocs,
- int32_t axis, ::arm_compute::ICLTensor *output_alloc)
+void ConcatLayer::configure(
+ const std::vector<::neurun::backend::acl_cl::operand::ICLTensor *> &input_allocs, int32_t axis,
+ ::neurun::backend::acl_cl::operand::ICLTensor *output_alloc)
{
_input_allocs = input_allocs;
_output_alloc = output_alloc;
diff --git a/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h
index 4767721fa..d468a6dfb 100644
--- a/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h
+++ b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h
@@ -14,17 +14,17 @@
* limitations under the License.
*/
-#ifndef __INTERNAL_KERNEL_ACL_CL_CONCAT_LAYER_H__
-#define __INTERNAL_KERNEL_ACL_CL_CONCAT_LAYER_H__
+#ifndef __NEURUN_KERNEL_ACL_CL_CONCAT_LAYER_H__
+#define __NEURUN_KERNEL_ACL_CL_CONCAT_LAYER_H__
#include <NeuralNetworks.h>
-#include <arm_compute/core/CL/ICLTensor.h>
#include <arm_compute/runtime/IFunction.h>
-#include "graph/operand/DataType.h"
+#include "model/operand/DataType.h"
+#include "backend/acl_cl/operand/ICLTensor.h"
-using OperandType = neurun::graph::operand::DataType;
+using OperandType = neurun::model::operand::DataType;
namespace neurun
{
@@ -44,9 +44,9 @@ public:
ConcatLayer();
public:
- void configure(const std::vector<::arm_compute::ICLTensor *> &input_allocs,
+ void configure(const std::vector<::neurun::backend::acl_cl::operand::ICLTensor *> &input_allocs,
int32_t axis /* NNAPI tensor axis from NHWC order */,
- ::arm_compute::ICLTensor *output_alloc);
+ ::neurun::backend::acl_cl::operand::ICLTensor *output_alloc);
void run();
@@ -54,8 +54,8 @@ private:
bool concatenationFloat32();
private:
- std::vector<::arm_compute::ICLTensor *> _input_allocs;
- ::arm_compute::ICLTensor *_output_alloc;
+ std::vector<::neurun::backend::acl_cl::operand::ICLTensor *> _input_allocs;
+ ::neurun::backend::acl_cl::operand::ICLTensor *_output_alloc;
int32_t _axis;
OperandType _input_type;
};
@@ -64,4 +64,4 @@ private:
} // namespace kernel
} // namespace neurun
-#endif // __INTERNAL_KERNEL_ACL_CL_CONCAT_LAYER_H__
+#endif // __NEURUN_KERNEL_ACL_CL_CONCAT_LAYER_H__
diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc b/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc
deleted file mode 100644
index fa1d77579..000000000
--- a/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-//
-// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
-//
-
-#if 0
-
-#include "TensorConvertFromCommonLayer.h"
-
-#include "internal/nnapi/feature/Reader.h"
-#include "backend/acl_cl/feature/View.h"
-
-#include <util/feature/IndexIterator.h>
-#include <arm_compute/runtime/CL/CLScheduler.h>
-
-namespace neurun
-{
-namespace kernel
-{
-namespace acl_cl
-{
-
-bool TensorConvertFromCommonLayer::convert()
-{
- auto inputBuffer = _inputTensor->buffer();
- auto inputSize = _inputTensor->info()->total_size();
-
- auto &queue = ::arm_compute::CLScheduler::get().queue();
-
- _outputTensor->map(queue);
-
- if (_tensorShape.rank() == 2)
- {
- const auto len = _tensorShape.dim(1);
-
- auto base = reinterpret_cast<const float *>(inputBuffer);
-
- for (int32_t n = 0; n < len; ++n)
- {
- auto from = base + n;
- auto into =
- reinterpret_cast<float *>(_outputTensor->ptr_to_element(::arm_compute::Coordinates{n}));
-
- *into = *from;
- }
- }
- else if (_tensorShape.rank() == 4)
- {
- auto featureShape = _tensorShape.asFeature();
-
- const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize};
- ::internal::arm_compute::feature::View<float> into{_outputTensor};
-
- ::nnfw::util::feature::iterate(featureShape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, ch, row, col);
- into.at(batch, ch, row, col) = value;
- };
- }
-
- _outputTensor->unmap(queue);
-}
-
-void TensorConvertFromCommonLayer::configure(::internal::common::Tensor *inputTensor,
- ::arm_compute::ICLTensor *outputTensor,
- const ::neurun::graph::operand::Shape &tensorShape)
-{
- _inputTensor = inputTensor;
- _outputTensor = outputTensor;
- _tensorShape = tensorShape;
-}
-
-void TensorConvertFromCommonLayer::run() { convert(); }
-
-} // namespace acl_cl
-} // namespace kernel
-} // namespace neurun
-
-#endif
diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.h b/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.h
deleted file mode 100644
index bd031a106..000000000
--- a/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-//
-// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
-//
-
-#if 0
-
-#ifndef __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
-#define __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
-
-#include <NeuralNetworks.h>
-
-#include <arm_compute/runtime/IFunction.h>
-#include <arm_compute/core/CL/ICLTensor.h>
-
-#include "internal/Model.h"
-#include "internal/common/Tensor.h"
-
-namespace neurun
-{
-namespace kernel
-{
-namespace acl_cl
-{
-
-class TensorConvertFromCommonLayer : public ::arm_compute::IFunction
-{
-public:
- TensorConvertFromCommonLayer() {}
-
-public:
- bool convert();
-
- void configure(::internal::common::Tensor *inputTensor, ::arm_compute::ICLTensor *outputTensor,
- const ::neurun::graph::operand::Shape &tensorShape);
-
- void run();
-
-private:
- ::internal::common::Tensor *_inputTensor;
- ::arm_compute::ICLTensor *_outputTensor;
-
- ::neurun::graph::operand::Shape _tensorShape{1};
-};
-
-} // namespace acl_cl
-} // namespace kernel
-} // namespace neurun
-
-#endif // __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
-
-#endif
diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc b/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc
deleted file mode 100644
index 985524bc3..000000000
--- a/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-//
-// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
-//
-
-#if 0
-
-#include "TensorConvertToCommonLayer.h"
-
-#include "backend/acl_cl/feature/View.h"
-#include "internal/nnapi/feature/View.h"
-
-#include <util/feature/IndexIterator.h>
-#include <arm_compute/runtime/CL/CLScheduler.h>
-
-namespace neurun
-{
-namespace kernel
-{
-namespace acl_cl
-{
-
-bool TensorConvertToCommonLayer::convert()
-{
- auto outputBuffer = _outputTensor->buffer();
- auto outputSize = _outputTensor->info()->total_size();
-
- auto &queue = ::arm_compute::CLScheduler::get().queue();
-
- _inputTensor->map(queue);
-
- if (_tensorShape.rank() == 2)
- {
- const auto len = _tensorShape.dim(1);
-
- auto base = reinterpret_cast<float *>(outputBuffer);
-
- for (int32_t n = 0; n < len; ++n)
- {
- auto from = reinterpret_cast<const float *>(
- _inputTensor->ptr_to_element(::arm_compute::Coordinates{n}));
- auto into = base + n;
-
- *into = *from;
- }
- }
- else if (_tensorShape.rank() == 4)
- {
- auto featureShape = _tensorShape.asFeature();
-
- const ::internal::arm_compute::feature::View<float> from{_inputTensor};
- ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize};
-
- ::nnfw::util::feature::iterate(featureShape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, ch, row, col);
- into.at(batch, ch, row, col) = value;
- };
- }
-
- _inputTensor->unmap(queue);
-}
-
-void TensorConvertToCommonLayer::configure(::arm_compute::ICLTensor *inputTensor,
- ::internal::common::Tensor *outputTensor,
- const ::neurun::graph::operand::Shape &tensorShape)
-{
- _inputTensor = inputTensor;
- _outputTensor = outputTensor;
- _tensorShape = tensorShape;
-}
-
-void TensorConvertToCommonLayer::run() { convert(); }
-
-} // namespace acl_cl
-} // namespace kernel
-} // namespace neurun
-
-#endif
diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.h b/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.h
deleted file mode 100644
index 576f1ee71..000000000
--- a/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-//
-// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
-//
-
-#if 0
-
-#ifndef __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_TO_COMMON_LAYER_H__
-#define __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_TO_COMMON_LAYER_H__
-
-#include <NeuralNetworks.h>
-
-#include <arm_compute/runtime/IFunction.h>
-#include <arm_compute/core/CL/ICLTensor.h>
-
-#include "internal/Model.h"
-#include "internal/common/Tensor.h"
-
-namespace neurun
-{
-namespace kernel
-{
-namespace acl_cl
-{
-
-class TensorConvertToCommonLayer : public ::arm_compute::IFunction
-{
-public:
- TensorConvertToCommonLayer() {}
-
-public:
- bool convert();
-
- void configure(::arm_compute::ICLTensor *inputTensor, ::internal::common::Tensor *outputTensor,
- const ::neurun::graph::operand::Shape &tensorShape);
-
- void run();
-
-private:
- ::arm_compute::ICLTensor *_inputTensor;
- ::internal::common::Tensor *_outputTensor;
-
- ::neurun::graph::operand::Shape _tensorShape{1};
-};
-
-} // namespace acl_cl
-} // namespace kernel
-} // namespace neurun
-
-#endif // __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_TO_COMMON_LAYER_H__
-
-#endif
diff --git a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc
index 2a6a84e10..f434a6dec 100644
--- a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc
@@ -27,14 +27,14 @@ namespace kernel
namespace cpu
{
-#define AVGPOOLING_PARAMETERS \
- uint32_t height = getSizeOfDimension(_inputShape, 1); \
- uint32_t width = getSizeOfDimension(_inputShape, 2); \
- uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
- uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
- \
- uint32_t paddingHeight = (uint32_t)_paddingTop; \
- uint32_t paddingWidth = (uint32_t)_paddingLeft;
+#define AVGPOOLING_PARAMETERS \
+ tflite::PoolParams op_params; \
+ op_params.stride_height = _strideHeight; \
+ op_params.stride_width = _strideWidth; \
+ op_params.filter_height = _kernelHeight; \
+ op_params.filter_width = _kernelWidth; \
+ op_params.padding_values.height = (int8_t)_paddingTop; \
+ op_params.padding_values.width = (int8_t)_paddingLeft;
AvgPoolLayer::AvgPoolLayer()
: _inputData(nullptr), _outputData(nullptr), _inputShape(), _outputShape(), _paddingLeft(0),
@@ -47,31 +47,31 @@ AvgPoolLayer::AvgPoolLayer()
bool AvgPoolLayer::averagePoolFloat32()
{
-
AVGPOOLING_PARAMETERS
float output_activation_min, output_activation_max;
CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
- ::tflite::optimized_ops::AveragePool(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), _strideWidth,
- _strideHeight, paddingWidth, paddingHeight, _kernelWidth, _kernelHeight,
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape));
+ ::tflite::optimized_ops::AveragePool(op_params, convertShapeToTFLiteShape(_inputShape),
+ reinterpret_cast<const float *>(_inputData),
+ convertShapeToTFLiteShape(_outputShape),
+ reinterpret_cast<float *>(_outputData));
return true;
}
bool AvgPoolLayer::averagePoolQuant8()
{
-
AVGPOOLING_PARAMETERS
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
&output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
- ::tflite::optimized_ops::AveragePool(_inputData, convertShapeToDims(_inputShape), _strideWidth,
- _strideHeight, paddingWidth, paddingHeight, _kernelWidth,
- _kernelHeight, output_activation_min, output_activation_max,
- _outputData, convertShapeToDims(_outputShape));
+ ::tflite::optimized_ops::AveragePool(op_params, convertShapeToTFLiteShape(_inputShape),
+ _inputData, convertShapeToTFLiteShape(_outputShape),
+ _outputData);
return true;
}
diff --git a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h
index 9f390a9e1..280f7ae5f 100644
--- a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h
+++ b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h
@@ -19,7 +19,7 @@
#include <NeuralNetworks.h>
-#include <arm_compute/runtime/IFunction.h>
+#include "exec/interface/IFunction.h"
#include "kernel/cpu/OperationUtils.h"
@@ -30,7 +30,7 @@ namespace kernel
namespace cpu
{
-class AvgPoolLayer : public ::arm_compute::IFunction
+class AvgPoolLayer : public ::neurun::exec::IFunction
{
public:
AvgPoolLayer();
diff --git a/runtimes/neurun/src/kernel/cpu/CMakeLists.txt b/runtimes/neurun/src/kernel/cpu/CMakeLists.txt
index dddf154c3..436cb898c 100644
--- a/runtimes/neurun/src/kernel/cpu/CMakeLists.txt
+++ b/runtimes/neurun/src/kernel/cpu/CMakeLists.txt
@@ -6,8 +6,8 @@ target_include_directories(${LIB_NEURUN_KERNEL_CPU} PUBLIC ${NNFW_INCLUDE_DIR})
target_include_directories(${LIB_NEURUN_KERNEL_CPU} PUBLIC ${NEURUN_INCLUDE_DIR})
target_include_directories(${LIB_NEURUN_KERNEL_CPU} PUBLIC ${CMAKE_SOURCE_DIR}/externals/tensorflow)
-target_link_libraries(${LIB_NEURUN_KERNEL_CPU} arm_compute) # TODO We should not need this
target_link_libraries(${LIB_NEURUN_KERNEL_CPU} tensorflow-lite)
+target_link_libraries(${LIB_NEURUN_KERNEL_CPU} nnfw_lib_misc)
set_target_properties(${LIB_NEURUN_KERNEL_CPU} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${LIB_NEURUN_KERNEL_CPU} PROPERTIES OUTPUT_NAME kernel_cpu)
diff --git a/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc b/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc
index 5fe5e3993..be093b437 100644
--- a/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc
@@ -24,6 +24,7 @@ namespace neurun
{
namespace kernel
{
+
namespace cpu
{
@@ -36,13 +37,21 @@ ConcatLayer::ConcatLayer()
bool ConcatLayer::concatenationFloat32()
{
- int num_inputs = _inputShapes.size();
- std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs);
- std::vector<::tflite::Dims<4>> inputDims(num_inputs);
- for (int i = 0; i < num_inputs; i++)
+ uint32_t num_inputs = _inputShapes.size();
+
+ tflite::ConcatenationParams op_params;
+ op_params.axis = _axis;
+ op_params.inputs_count = num_inputs;
+
+ std::vector<::tflite::RuntimeShape *> inputDimsPtr;
+ std::vector<::tflite::RuntimeShape> inputDims;
+ inputDimsPtr.reserve(num_inputs);
+ inputDims.reserve(num_inputs);
+
+ for (uint32_t i = 0; i < num_inputs; i++)
{
- inputDims[i] = convertShapeToDims(_inputShapes[i]);
- inputDimsPtr[i] = &inputDims[i];
+ inputDims.push_back(convertShapeToTFLiteShape(_inputShapes[i]));
+ inputDimsPtr.push_back(&inputDims[i]);
}
std::vector<const float *> inputFloatPtrs;
@@ -52,24 +61,44 @@ bool ConcatLayer::concatenationFloat32()
inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(ptr));
}
- ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, float>(
- getNumberOfDimensions(_outputShape) - _axis - 1, inputFloatPtrs.data(), inputDimsPtr.data(),
- num_inputs, reinterpret_cast<float *>(_outputData), convertShapeToDims(_outputShape));
+ ::tflite::optimized_ops::Concatenation<float>(
+ op_params, inputDimsPtr.data(), inputFloatPtrs.data(),
+ convertShapeToTFLiteShape(_outputShape), reinterpret_cast<float *>(_outputData));
return true;
}
bool ConcatLayer::concatenationQuant8()
{
int num_inputs = _inputShapes.size();
- std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs);
- std::vector<::tflite::Dims<4>> inputDims(num_inputs);
- for (int i = 0; i < num_inputs; i++)
+
+ std::vector<int32_t> input_zeropoints(num_inputs);
+ std::vector<float> input_scales(num_inputs);
+ for (uint32_t i = 0; i < num_inputs; i++)
{
- inputDims[i] = convertShapeToDims(_inputShapes[i]);
- inputDimsPtr[i] = &inputDims[i];
+ input_zeropoints[i] = _inputShapes[i].offset;
+ input_scales[i] = _inputShapes[i].scale;
}
- ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, uint8_t>(
- getNumberOfDimensions(_outputShape) - _axis - 1, _inputDataPtrs.data(), inputDimsPtr.data(),
- num_inputs, _outputData, convertShapeToDims(_outputShape));
+
+ tflite::ConcatenationParams op_params;
+ op_params.axis = _axis;
+ op_params.inputs_count = num_inputs;
+ op_params.input_zeropoint = input_zeropoints.data();
+ op_params.input_scale = input_scales.data();
+ op_params.output_zeropoint = _outputShape.offset;
+ op_params.output_scale = _outputShape.scale;
+
+ std::vector<::tflite::RuntimeShape *> inputDimsPtr;
+ std::vector<::tflite::RuntimeShape> inputDims;
+ inputDimsPtr.reserve(num_inputs);
+ inputDims.reserve(num_inputs);
+ for (uint32_t i = 0; i < num_inputs; i++)
+ {
+ inputDims.push_back(convertShapeToTFLiteShape(_inputShapes[i]));
+ inputDimsPtr.push_back(&inputDims[i]);
+ }
+
+ ::tflite::optimized_ops::Concatenation<uint8_t>(
+ op_params, inputDimsPtr.data(), _inputDataPtrs.data(),
+ convertShapeToTFLiteShape(_outputShape), _outputData);
return true;
}
diff --git a/runtimes/neurun/src/kernel/cpu/ConcatLayer.h b/runtimes/neurun/src/kernel/cpu/ConcatLayer.h
index 9aacab5e8..64f813508 100644
--- a/runtimes/neurun/src/kernel/cpu/ConcatLayer.h
+++ b/runtimes/neurun/src/kernel/cpu/ConcatLayer.h
@@ -20,7 +20,7 @@
#include <NeuralNetworks.h>
-#include <arm_compute/runtime/IFunction.h>
+#include "exec/interface/IFunction.h"
#include "kernel/cpu/OperationUtils.h"
@@ -31,7 +31,7 @@ namespace kernel
namespace cpu
{
-class ConcatLayer : public ::arm_compute::IFunction
+class ConcatLayer : public ::neurun::exec::IFunction
{
public:
ConcatLayer();
diff --git a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc
index 81e88e0f0..c694fa75f 100644
--- a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc
@@ -33,55 +33,51 @@ static constexpr int kStaticBufferSize = 1605632;
static char static_scratch_buffer[kStaticBufferSize];
static std::mutex executionMutex;
-#define ANDROID_NN_CONV_PARAMETERS(Type) \
- uint32_t height = getSizeOfDimension(_inputShape, 1); \
- uint32_t width = getSizeOfDimension(_inputShape, 2); \
- uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \
- uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \
- uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
- uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
- uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \
- \
- uint32_t paddingHeight = (uint32_t)_paddingTop; \
- uint32_t paddingWidth = (uint32_t)_paddingLeft; \
- \
- ::tflite::Dims<4> im2colDim; \
- im2colDim.sizes[3] = (int)getSizeOfDimension(_outputShape, 0); \
- im2colDim.sizes[2] = (int)getSizeOfDimension(_outputShape, 1); \
- im2colDim.sizes[1] = (int)getSizeOfDimension(_outputShape, 2); \
- im2colDim.sizes[0] = (int)inDepth * kernelHeight * kernelWidth; \
- \
- im2colDim.strides[0] = 1; \
- for (int i = 1; i < 4; i++) \
- { \
- im2colDim.strides[i] = im2colDim.strides[i - 1] * im2colDim.sizes[i - 1]; \
- } \
- Type *im2colData = nullptr; \
- uint64_t im2colByteSize = sizeof(Type); \
- std::unique_ptr<Type[]> im2colGuard; \
- for (int i = 0; i < 4; i++) \
- { \
- im2colByteSize *= im2colDim.sizes[i]; \
- } \
- /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \
- if (im2colByteSize >= 0x7fffffff) \
- { \
- std::cout << "Conv size is too large, not enough memory" << std::endl; \
- return false; \
- } \
- if (im2colByteSize <= kStaticBufferSize) \
- { \
- im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \
- } \
- else \
- { \
- im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \
- if (im2colData == nullptr) \
- { \
- std::cout << "Conv size is too large, not enough memory" << std::endl; \
- return false; \
- } \
- im2colGuard.reset(im2colData); \
+#define ANDROID_NN_CONV_PARAMETERS(Type) \
+ uint32_t height = getSizeOfDimension(_inputShape, 1); \
+ uint32_t width = getSizeOfDimension(_inputShape, 2); \
+ uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \
+ uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \
+ uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
+ uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
+ uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \
+ \
+ uint32_t paddingHeight = (uint32_t)_paddingTop; \
+ uint32_t paddingWidth = (uint32_t)_paddingLeft; \
+ \
+ Shape im2colShape; \
+ im2colShape.dimensions.resize(4); \
+ im2colShape.dimensions[0] = getSizeOfDimension(_outputShape, 0); \
+ im2colShape.dimensions[1] = getSizeOfDimension(_outputShape, 1); \
+ im2colShape.dimensions[2] = getSizeOfDimension(_outputShape, 2); \
+ im2colShape.dimensions[3] = inDepth * kernelHeight * kernelWidth; \
+ \
+ Type *im2colData = nullptr; \
+ uint64_t im2colByteSize = sizeof(Type); \
+ std::unique_ptr<Type[]> im2colGuard; \
+ for (int i = 0; i < 4; i++) \
+ { \
+ im2colByteSize *= im2colShape.dimensions[i]; \
+ } \
+ /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \
+ if (im2colByteSize >= 0x7fffffff) \
+ { \
+ std::cout << "Conv size is too large, not enough memory" << std::endl; \
+ return false; \
+ } \
+ if (im2colByteSize <= kStaticBufferSize) \
+ { \
+ im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \
+ } \
+ else \
+ { \
+ im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \
+ if (im2colData == nullptr) \
+ { \
+ std::cout << "Conv size is too large, not enough memory" << std::endl; \
+ return false; \
+ } \
+ im2colGuard.reset(im2colData); \
}
ConvolutionLayer::ConvolutionLayer()
@@ -112,19 +108,32 @@ bool ConvolutionLayer::convFloat32()
float output_activation_min, output_activation_max;
CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
int32_t dilationWidthFactor = 1, dilationHeightFactor = 1;
+
+ ::tflite::ConvParams op_params;
+ op_params.padding_type = ::tflite::PaddingType::kSame;
+ op_params.padding_values.width = paddingWidth;
+ op_params.padding_values.height = paddingHeight;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = dilationWidthFactor;
+ op_params.dilation_height_factor = dilationHeightFactor;
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
::tflite::optimized_ops::Conv(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
- reinterpret_cast<const float *>(_kernelData), convertShapeToDims(_kernelShape),
- reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape), _strideWidth,
- _strideHeight, dilationWidthFactor, dilationHeightFactor, paddingWidth, paddingHeight,
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape), im2colDataToPass, im2colDim);
+ op_params, convertShapeToTFLiteShape(_inputShape),
+ reinterpret_cast<const float *>(_inputData), convertShapeToTFLiteShape(_kernelShape),
+ reinterpret_cast<const float *>(_kernelData), convertShapeToTFLiteShape(_biasShape),
+ reinterpret_cast<const float *>(_biasData), convertShapeToTFLiteShape(_outputShape),
+ reinterpret_cast<float *>(_outputData), convertShapeToTFLiteShape(im2colShape),
+ im2colDataToPass);
return true;
}
bool ConvolutionLayer::convQuant8()
{
ANDROID_NN_CONV_PARAMETERS(uint8_t)
+
int32_t inputOffset = -_inputShape.offset;
int32_t kernelOffset = -_kernelShape.offset;
int32_t outputOffset = _outputShape.offset;
@@ -141,6 +150,24 @@ bool ConvolutionLayer::convQuant8()
}
CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
&output_activation_max);
+ int32_t dilationWidthFactor = 1, dilationHeightFactor = 1;
+
+ ::tflite::ConvParams op_params;
+ op_params.padding_type = ::tflite::PaddingType::kSame;
+ op_params.padding_values.width = paddingWidth;
+ op_params.padding_values.height = paddingHeight;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = dilationWidthFactor;
+ op_params.dilation_height_factor = dilationHeightFactor;
+ op_params.input_offset = inputOffset;
+ op_params.weights_offset = kernelOffset;
+ op_params.output_offset = outputOffset;
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+
static gemmlowp::GemmContext gemm_context;
// Prevent concurrent executions that may access the scratch buffer and
// gemm_context.
@@ -148,11 +175,10 @@ bool ConvolutionLayer::convQuant8()
// Alow gemmlowp automatically decide how many threads to use.
gemm_context.set_max_num_threads(0);
::tflite::optimized_ops::Conv(
- _inputData, convertShapeToDims(_inputShape), inputOffset, _kernelData,
- convertShapeToDims(_kernelShape), kernelOffset, reinterpret_cast<const int32_t *>(_biasData),
- convertShapeToDims(_biasShape), _strideWidth, _strideHeight, paddingWidth, paddingHeight,
- outputOffset, output_multiplier, output_shift, output_activation_min, output_activation_max,
- _outputData, convertShapeToDims(_outputShape), im2colData, im2colDim, &gemm_context);
+ op_params, convertShapeToTFLiteShape(_inputShape), _inputData,
+ convertShapeToTFLiteShape(_kernelShape), _kernelData, convertShapeToTFLiteShape(_biasShape),
+ reinterpret_cast<const int32_t *>(_biasData), convertShapeToTFLiteShape(_outputShape),
+ _outputData, convertShapeToTFLiteShape(im2colShape), im2colData, &gemm_context);
return true;
}
diff --git a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h
index b7afbcec6..9b7f55ff1 100644
--- a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h
+++ b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h
@@ -19,7 +19,7 @@
#include <NeuralNetworks.h>
-#include <arm_compute/runtime/IFunction.h>
+#include "exec/interface/IFunction.h"
#include "kernel/cpu/OperationUtils.h"
@@ -30,7 +30,7 @@ namespace kernel
namespace cpu
{
-class ConvolutionLayer : public ::arm_compute::IFunction
+class ConvolutionLayer : public ::neurun::exec::IFunction
{
public:
ConvolutionLayer();
diff --git a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
index 41b9afc0c..abe82db5e 100644
--- a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
@@ -44,64 +44,39 @@ FullyConnectedLayer::FullyConnectedLayer()
static std::mutex executionMutex;
bool FullyConnectedLayer::fullyConnectedFloat32()
{
- float output_activation_min, output_activation_max;
- CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
- // b/80425683, optimized implementation produces incorrect results when the
- // number of input elements is the squre of batch_size.
- uint32_t batch_size = getSizeOfDimension(_outputShape, 0);
- uint32_t input_n_elements = getNumberOfElements(_inputShape);
- if (batch_size * batch_size == input_n_elements)
+ int total_input_size = 1;
+ for (int i = 0; i < _inputShape.dimensions.size(); i++)
{
- ::tflite::reference_ops::FullyConnected(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
- reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape),
- reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape),
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape));
- }
- else
- {
- ::tflite::optimized_ops::FullyConnected(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
- reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape),
- reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape),
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape));
+ total_input_size *= _inputShape.dimensions[i];
}
+
+ int input_size = _weightsShape.dimensions[1];
+ const int batch_size = total_input_size / input_size;
+ const int num_units = _weightsShape.dimensions[0];
+
+ TfLiteFusedActivation act = convertFusedActivation(_activation);
+
+ ::tflite::tensor_utils::VectorBatchVectorAssign(reinterpret_cast<const float *>(_biasData),
+ num_units, batch_size,
+ reinterpret_cast<float *>(_outputData));
+
+ // Compute output += weight * input
+ ::tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+ reinterpret_cast<const float *>(_weightsData), num_units, input_size,
+ reinterpret_cast<const float *>(_inputData), batch_size,
+ reinterpret_cast<float *>(_outputData), /*result_stride=*/1);
+
+ // Apply activation function
+ ::tflite::tensor_utils::ApplyActivationToVector(reinterpret_cast<float *>(_outputData),
+ batch_size * num_units, act,
+ reinterpret_cast<float *>(_outputData));
+
return true;
}
bool FullyConnectedLayer::fullyConnectedQuant8()
{
- int32_t inputOffset = -_inputShape.offset;
- int32_t weightsOffset = -_weightsShape.offset;
- int32_t outputOffset = _outputShape.offset;
- float real_multiplier = 0.0;
- int32_t output_multiplier = 0;
- int32_t output_shift = 0;
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- // Caution : 'Convolution' can make misleading. It seems it is just math term.
- if (!GetQuantizedConvolutionMultipler(_inputShape, _weightsShape, _biasShape, _outputShape,
- &real_multiplier) ||
- !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift))
- {
- return false;
- }
- CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
- &output_activation_max);
- static gemmlowp::GemmContext gemm_context;
- // Prevent concurrent executions that access gemm_context.
- std::unique_lock<std::mutex> lock(executionMutex);
- // Alow gemmlowp automatically decide how many threads to use.
- gemm_context.set_max_num_threads(0);
- ::tflite::optimized_ops::FullyConnected(
- _inputData, convertShapeToDims(_inputShape), inputOffset, _weightsData,
- convertShapeToDims(_weightsShape), weightsOffset,
- reinterpret_cast<const int32_t *>(_biasData), convertShapeToDims(_biasShape), outputOffset,
- output_multiplier, output_shift, output_activation_min, output_activation_max, _outputData,
- convertShapeToDims(_outputShape), &gemm_context);
- return true;
+ throw std::runtime_error{"FullyConnectedLayer : Not tested for TENSOR_QUANT8_ASYMM"};
}
void FullyConnectedLayer::configure(uint8_t *inputData, const Shape inputShape,
diff --git a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h
index b1ba172b0..20a388349 100644
--- a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h
+++ b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h
@@ -19,7 +19,7 @@
#include <NeuralNetworks.h>
-#include <arm_compute/runtime/IFunction.h>
+#include "exec/interface/IFunction.h"
#include "kernel/cpu/OperationUtils.h"
@@ -30,7 +30,7 @@ namespace kernel
namespace cpu
{
-class FullyConnectedLayer : public ::arm_compute::IFunction
+class FullyConnectedLayer : public ::neurun::exec::IFunction
{
public:
FullyConnectedLayer();
diff --git a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc
index 3d96bb401..c4a288b07 100644
--- a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc
@@ -26,14 +26,14 @@ namespace kernel
namespace cpu
{
-#define MAXPOOLING_PARAMETERS \
- uint32_t height = getSizeOfDimension(_inputShape, 1); \
- uint32_t width = getSizeOfDimension(_inputShape, 2); \
- uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
- uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
- \
- uint32_t paddingHeight = (uint32_t)_paddingTop; \
- uint32_t paddingWidth = (uint32_t)_paddingLeft;
+#define MAXPOOLING_PARAMETERS \
+ tflite::PoolParams op_params; \
+ op_params.stride_height = _strideHeight; \
+ op_params.stride_width = _strideWidth; \
+ op_params.filter_height = _kernelHeight; \
+ op_params.filter_width = _kernelWidth; \
+ op_params.padding_values.height = (int8_t)_paddingTop; \
+ op_params.padding_values.width = (int8_t)_paddingLeft;
MaxPoolLayer::MaxPoolLayer()
: _inputData(nullptr), _outputData(nullptr), _inputShape(), _outputShape(), _paddingLeft(0),
@@ -46,31 +46,30 @@ MaxPoolLayer::MaxPoolLayer()
bool MaxPoolLayer::maxPoolFloat32()
{
-
MAXPOOLING_PARAMETERS
float output_activation_min, output_activation_max;
CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
- ::tflite::optimized_ops::MaxPool(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), _strideWidth,
- _strideHeight, paddingWidth, paddingHeight, _kernelWidth, _kernelHeight,
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape));
+ ::tflite::optimized_ops::MaxPool(op_params, convertShapeToTFLiteShape(_inputShape),
+ reinterpret_cast<const float *>(_inputData),
+ convertShapeToTFLiteShape(_outputShape),
+ reinterpret_cast<float *>(_outputData));
return true;
}
bool MaxPoolLayer::maxPoolQuant8()
{
-
MAXPOOLING_PARAMETERS
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
&output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
- ::tflite::optimized_ops::MaxPool(_inputData, convertShapeToDims(_inputShape), _strideWidth,
- _strideHeight, paddingWidth, paddingHeight, _kernelWidth,
- _kernelHeight, output_activation_min, output_activation_max,
- _outputData, convertShapeToDims(_outputShape));
+ ::tflite::optimized_ops::MaxPool(op_params, convertShapeToTFLiteShape(_inputShape), _inputData,
+ convertShapeToTFLiteShape(_outputShape), _outputData);
return true;
}
diff --git a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h
index b42efb9f6..2b185550b 100644
--- a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h
+++ b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h
@@ -19,7 +19,7 @@
#include <NeuralNetworks.h>
-#include <arm_compute/runtime/IFunction.h>
+#include "exec/interface/IFunction.h"
#include "kernel/cpu/OperationUtils.h"
@@ -30,7 +30,7 @@ namespace kernel
namespace cpu
{
-class MaxPoolLayer : public ::arm_compute::IFunction
+class MaxPoolLayer : public ::neurun::exec::IFunction
{
public:
MaxPoolLayer();
diff --git a/runtimes/neurun/src/kernel/cpu/OperationUtils.cc b/runtimes/neurun/src/kernel/cpu/OperationUtils.cc
index 5ec2f8e62..b28508c27 100644
--- a/runtimes/neurun/src/kernel/cpu/OperationUtils.cc
+++ b/runtimes/neurun/src/kernel/cpu/OperationUtils.cc
@@ -184,7 +184,7 @@ int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift)
return static_cast<int32_t>(std::floor(max_input_rescaled));
}
-Shape getShape(const ::neurun::graph::operand::Object &o)
+Shape getShape(const ::neurun::model::operand::Object &o)
{
Shape shape;
diff --git a/runtimes/neurun/src/kernel/cpu/OperationUtils.h b/runtimes/neurun/src/kernel/cpu/OperationUtils.h
index 5914d04e3..3610990a5 100644
--- a/runtimes/neurun/src/kernel/cpu/OperationUtils.h
+++ b/runtimes/neurun/src/kernel/cpu/OperationUtils.h
@@ -23,11 +23,13 @@
#include <limits>
#include <vector>
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
#include "tensorflow/contrib/lite/kernels/internal/types.h"
-#include "graph/operand/Object.h"
-#include "graph/operand/DataType.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "model/operand/Object.h"
+#include "model/operand/DataType.h"
-using OperandType = neurun::graph::operand::DataType;
+using OperandType = neurun::model::operand::DataType;
namespace neurun
{
@@ -75,6 +77,51 @@ inline ::tflite::Dims<4> convertShapeToDims(const Shape &shape)
return dims;
}
+inline ::tflite::RuntimeShape convertShapeToTFLiteShape(const Shape &shape)
+{
+ std::vector<int32_t> raw_shape;
+ raw_shape.resize(4);
+
+ for (uint32_t i = 0; i < 4; ++i)
+ {
+ if (i >= shape.dimensions.size())
+ {
+ raw_shape[i] = 1;
+ }
+ else
+ {
+ raw_shape[i] = shape.dimensions[i];
+ }
+ }
+
+ return ::tflite::GetTensorShape(raw_shape);
+}
+
+inline TfLiteFusedActivation convertFusedActivation(FuseCode act)
+{
+ if (act == ANEURALNETWORKS_FUSED_NONE)
+ {
+ return kTfLiteActNone;
+ }
+
+ if (act == ANEURALNETWORKS_FUSED_RELU)
+ {
+ return kTfLiteActRelu;
+ }
+
+ if (act == ANEURALNETWORKS_FUSED_RELU1)
+ {
+ return kTfLiteActRelu1;
+ }
+
+ if (act == ANEURALNETWORKS_FUSED_RELU6)
+ {
+ return kTfLiteActRelu6;
+ }
+
+ return kTfLiteActNone;
+}
+
__wur bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier,
int32_t *right_shift);
@@ -92,7 +139,7 @@ void CalculateActivationRangeUint8(int32_t activation, const Shape &outputShape,
int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift);
-Shape getShape(const ::neurun::graph::operand::Object &o);
+Shape getShape(const ::neurun::model::operand::Object &o);
uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions);
diff --git a/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc b/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc
new file mode 100644
index 000000000..ba8c5ab92
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PermuteLayer.h"
+
+#include "util/feature/nhwc/Reader.h"
+#include "util/feature/nhwc/View.h"
+#include "util/feature/nchw/View.h"
+#include "util/feature/Coordinate4D.h"
+
+#include <misc/feature/IndexIterator.h>
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+using Type = model::operation::PermuteNode::Type;
+
+void PermuteLayer::configure(std::shared_ptr<::neurun::backend::operand::IObject> input,
+ std::shared_ptr<::neurun::backend::operand::IObject> output,
+ const model::operand::Shape &shape, Type type)
+{
+ _input = input;
+ _output = output;
+ _shape = shape;
+ _type = type;
+}
+
+void PermuteLayer::run()
+{
+ auto rank = _shape.rank();
+
+ switch (_type)
+ {
+ case Type::NHWC_TO_NCHW:
+ {
+ auto fn = [&](::neurun::backend::operand::ITensor &tensor) {
+ auto input_tensor = _input->ptr();
+
+ auto input_buffer = input_tensor->buffer();
+ auto input_size = input_tensor->total_size();
+
+ auto output_buffer = tensor.buffer();
+ auto output_size = tensor.total_size();
+ switch (rank)
+ {
+ case 0:
+ case 1:
+ {
+ memcpy(output_buffer, input_buffer, input_size);
+ break;
+ }
+ case 2:
+ {
+ auto matrix_shape = _shape.asMatrix();
+
+ for (auto h = 0; h < matrix_shape.H; ++h)
+ {
+ neurun::util::feature::Coordinate4D coord{0, h, 0, 0};
+ memcpy(output_buffer + tensor.calcOffset(coord), input_buffer + h * matrix_shape.W,
+ matrix_shape.W * sizeof(float));
+ }
+ break;
+ }
+ case 3:
+ {
+ const int32_t depth = _shape.dim(0);
+ const int32_t height = _shape.dim(1);
+ const int32_t width = _shape.dim(2);
+
+ for (auto c = 0; c < depth; ++c)
+ {
+ for (auto h = 0; h < height; ++h)
+ {
+ neurun::util::feature::Coordinate4D coord{0, h, 0, c};
+ memcpy(output_buffer + tensor.calcOffset(coord),
+ input_buffer + c * height * width + h * width, width * sizeof(float));
+ }
+ }
+ break;
+ }
+ case 4:
+ {
+ auto feature = _shape.asFeature();
+
+ const util::feature::nhwc::Reader<float> from{
+ feature, reinterpret_cast<const float *>(input_buffer), input_size};
+ util::feature::nchw::View<float> into{&tensor};
+
+ ::nnfw::misc::feature::iterate(feature)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, ch, row, col) = value;
+ };
+ break;
+ }
+ default:
+ throw "NYI";
+ break;
+ }
+ };
+ _output->access(fn);
+ break;
+ }
+ case Type::NCHW_TO_NHWC:
+ {
+ auto fn = [&](::neurun::backend::operand::ITensor &tensor) {
+ auto input_buffer = tensor.buffer();
+ auto input_size = tensor.total_size();
+
+ auto output_tensor = _output->ptr();
+
+ auto output_buffer = output_tensor->buffer();
+ auto output_size = output_tensor->total_size();
+
+ switch (rank)
+ {
+ case 0:
+ case 1:
+ {
+ memcpy(output_buffer, input_buffer, output_size);
+ break;
+ }
+ case 2:
+ {
+ auto matrix_shape = _shape.asMatrix();
+
+ for (auto h = 0; h < matrix_shape.H; ++h)
+ {
+ neurun::util::feature::Coordinate4D coord{0, h, 0, 0};
+ memcpy(output_buffer + h * matrix_shape.W, input_buffer + tensor.calcOffset(coord),
+ matrix_shape.W * sizeof(float));
+ }
+ break;
+ }
+ case 3:
+ {
+ const int32_t depth = _shape.dim(0);
+ const int32_t height = _shape.dim(1);
+ const int32_t width = _shape.dim(2);
+
+ for (auto c = 0; c < depth; ++c)
+ {
+ for (auto h = 0; h < height; ++h)
+ {
+ neurun::util::feature::Coordinate4D coord{0, h, 0, c};
+ memcpy(output_buffer + c * height * width + h * width,
+ input_buffer + tensor.calcOffset(coord), width * sizeof(float));
+ }
+ }
+ break;
+ }
+ case 4:
+ {
+ auto feature = _shape.asFeature();
+
+ const util::feature::nchw::View<float> from{&tensor};
+ util::feature::nhwc::View<float> into{feature, reinterpret_cast<float *>(output_buffer),
+ output_size};
+
+ ::nnfw::misc::feature::iterate(feature)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, ch, row, col) = value;
+ };
+ break;
+ }
+ default:
+ throw "NYI";
+ break;
+ }
+ };
+ _input->access(fn);
+ break;
+ }
+ case Type::COPY:
+ // If two different backends using same tensor layout, we need this.
+ throw "NYI";
+ break;
+ }
+}
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/cpu/PermuteLayer.h b/runtimes/neurun/src/kernel/cpu/PermuteLayer.h
new file mode 100644
index 000000000..d9e1709bc
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/PermuteLayer.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_KERNEL_CPU_PERMUTE_LAYER_H__
+#define __NEURUN_KERNEL_CPU_PERMUTE_LAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include "exec/interface/IFunction.h"
+
+#include "util/feature/nhwc/View.h"
+#include "OperationUtils.h"
+#include "backend/interface/operand/IObject.h"
+#include "model/operation/PermuteNode.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class PermuteLayer : public ::neurun::exec::IFunction
+{
+public:
+ PermuteLayer() = default;
+
+public:
+ void configure(std::shared_ptr<::neurun::backend::operand::IObject> input,
+ std::shared_ptr<::neurun::backend::operand::IObject> output,
+ const model::operand::Shape &shape, model::operation::PermuteNode::Type type);
+ void run();
+
+private:
+ std::shared_ptr<::neurun::backend::operand::IObject> _input;
+ std::shared_ptr<::neurun::backend::operand::IObject> _output;
+ model::operand::Shape _shape;
+ model::operation::PermuteNode::Type _type;
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_PERMUTE_LAYER_H__
diff --git a/runtimes/neurun/src/kernel/cpu/ReshapeLayer.h b/runtimes/neurun/src/kernel/cpu/ReshapeLayer.h
index 395cc1d7f..51d0bacee 100644
--- a/runtimes/neurun/src/kernel/cpu/ReshapeLayer.h
+++ b/runtimes/neurun/src/kernel/cpu/ReshapeLayer.h
@@ -19,7 +19,7 @@
#include <NeuralNetworks.h>
-#include <arm_compute/runtime/IFunction.h>
+#include "exec/interface/IFunction.h"
#include "kernel/cpu/OperationUtils.h"
@@ -30,7 +30,7 @@ namespace kernel
namespace cpu
{
-class ReshapeLayer : public ::arm_compute::IFunction
+class ReshapeLayer : public ::neurun::exec::IFunction
{
public:
ReshapeLayer();
diff --git a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc
index 4f5a69f2e..c998c65f6 100644
--- a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc
@@ -33,45 +33,86 @@ SoftMaxLayer::SoftMaxLayer()
// DO NOTHING
}
+// Performs softmax along the input of size (input_size * batch_size).
+void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
+ float *out)
+{
+ TF_LITE_ASSERT(input_size > 0);
+
+ // For each batch
+ for (int b = 0; b < batch_size; b++)
+ {
+ // Find the max coeff.
+ float max_coeff = in[0];
+ for (int i = 1; i < input_size; i++)
+ {
+ if (in[i] > max_coeff)
+ max_coeff = in[i];
+ }
+
+ // Compute the normalized sum of exps.
+ float exp_sum = 0.0;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] = std::exp((in[i] - max_coeff) * beta);
+ exp_sum += out[i];
+ }
+
+ // Divide by the sum of exps.
+ float reciprocal_sum_exp = 1.f / exp_sum;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] *= reciprocal_sum_exp;
+ }
+
+ // Advance in and out pointers for the next batch.
+ in += input_size;
+ out += input_size;
+ }
+}
+
bool SoftMaxLayer::softmaxFloat32()
{
- ::tflite::Dims<4> dim;
+ Shape shapeIn4D;
+
if (getNumberOfDimensions(_inputShape) == 2)
{
uint32_t batch_size = getSizeOfDimension(_inputShape, 0);
uint32_t input_size = getNumberOfElements(_inputShape) / batch_size;
- Shape shapeIn4D;
- shapeIn4D.dimensions = {batch_size, 1, 1, input_size};
- dim = convertShapeToDims(shapeIn4D);
+ Softmax(reinterpret_cast<const float *>(_inputData), input_size, batch_size, _beta,
+ reinterpret_cast<float *>(_outputData));
}
else if (getNumberOfDimensions(_inputShape) == 4)
{
- dim = convertShapeToDims(_inputShape);
+ ::tflite::SoftmaxParams op_params;
+ op_params.beta = _beta;
+ ::tflite::optimized_ops::Softmax(op_params, convertShapeToTFLiteShape(_inputShape),
+ reinterpret_cast<const float *>(_inputData),
+ convertShapeToTFLiteShape(_outputShape),
+ reinterpret_cast<float *>(_outputData));
}
else
{
std::cout << "only 2D and 4D tensors supported" << std::endl;
return false;
}
- ::tflite::optimized_ops::Softmax(reinterpret_cast<const float *>(_inputData), dim, _beta,
- reinterpret_cast<float *>(_outputData), dim);
+
return true;
}
bool SoftMaxLayer::softmaxQuant8()
{
- ::tflite::Dims<4> dim;
+ Shape shapeIn4D = _inputShape;
+
if (getNumberOfDimensions(_inputShape) == 2)
{
uint32_t batch_size = getSizeOfDimension(_inputShape, 0);
uint32_t input_size = getNumberOfElements(_inputShape) / batch_size;
- Shape shapeIn4D;
shapeIn4D.dimensions = {batch_size, 1, 1, input_size};
- dim = convertShapeToDims(shapeIn4D);
}
else if (getNumberOfDimensions(_inputShape) == 4)
{
- dim = convertShapeToDims(_inputShape);
+ shapeIn4D = _inputShape;
}
else
{
@@ -94,8 +135,13 @@ bool SoftMaxLayer::softmaxQuant8()
return false;
}
float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift);
- ::tflite::optimized_ops::Softmax(_inputData, dim, input_multiplier, input_left_shift, diff_min,
- _outputData, dim);
+
+ ::tflite::SoftmaxParams op_params;
+ op_params.input_multiplier = input_multiplier;
+ op_params.input_left_shift = input_left_shift;
+ op_params.diff_min = diff_min;
+ ::tflite::optimized_ops::Softmax(op_params, convertShapeToTFLiteShape(shapeIn4D), _inputData,
+ convertShapeToTFLiteShape(shapeIn4D), _outputData);
return true;
}
diff --git a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h
index 8057be52f..df1aa4044 100644
--- a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h
+++ b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h
@@ -19,7 +19,7 @@
#include <NeuralNetworks.h>
-#include <arm_compute/runtime/IFunction.h>
+#include "exec/interface/IFunction.h"
#include "kernel/cpu/OperationUtils.h"
@@ -30,7 +30,7 @@ namespace kernel
namespace cpu
{
-class SoftMaxLayer : public ::arm_compute::IFunction
+class SoftMaxLayer : public ::neurun::exec::IFunction
{
public:
SoftMaxLayer();
diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.cc b/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.cc
deleted file mode 100644
index 00e914732..000000000
--- a/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-//
-// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
-//
-
-#if 0
-
-#include "TensorConvertFromCommonLayer.h"
-
-#include "internal/nnapi/feature/Reader.h"
-#include "internal/nnapi/feature/View.h"
-
-#include <util/feature/IndexIterator.h>
-
-namespace neurun
-{
-namespace kernel
-{
-namespace cpu
-{
-
-bool TensorConvertFromCommonLayer::convert()
-{
- auto inputBuffer = _inputTensor->buffer();
- auto inputSize = _inputTensor->info()->total_size();
-
- auto outputBuffer = _outputTensor->buffer();
- auto outputSize = _outputTensor->info()->total_size();
-
- if (_tensorShape.rank() == 2)
- {
- const auto len = _tensorShape.dim(1);
-
- auto base = reinterpret_cast<const float *>(inputBuffer);
-
- for (int32_t n = 0; n < len; ++n)
- {
- auto from = base + n;
- auto into =
- reinterpret_cast<float *>(_outputTensor->ptr_to_element(::arm_compute::Coordinates{n}));
-
- *into = *from;
- }
- }
- else if (_tensorShape.rank() == 4)
- {
- auto featureShape = _tensorShape.asFeature();
-
- const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize};
- ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize};
-
- ::nnfw::util::feature::iterate(featureShape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, ch, row, col);
- into.at(batch, ch, row, col) = value;
- };
- }
-}
-
-void TensorConvertFromCommonLayer::configure(::internal::common::Tensor *inputTensor,
- ::internal::cpu::Tensor *outputTensor,
- const Shape &tensorShape)
-{
- _inputTensor = inputTensor;
- _outputTensor = outputTensor;
- _tensorShape = tensorShape;
-}
-
-void TensorConvertFromCommonLayer::run() { convert(); }
-
-} // namespace cpu
-} // namespace kernel
-} // namespace neurun
-
-#endif
diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.h b/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.h
deleted file mode 100644
index 56f7bcf32..000000000
--- a/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-//
-// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
-//
-
-#if 0
-
-#ifndef __NEURUN_KERNEL_CPU_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
-#define __NEURUN_KERNEL_CPU_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
-
-#include <NeuralNetworks.h>
-
-#include <arm_compute/runtime/IFunction.h>
-
-#include "internal/Model.h"
-#include "internal/common/Tensor.h"
-#include "internal/cpu.h"
-
-namespace neurun
-{
-namespace kernel
-{
-namespace cpu
-{
-
-class TensorConvertFromCommonLayer : public ::arm_compute::IFunction
-{
-public:
- TensorConvertFromCommonLayer() {}
-
-public:
- bool convert();
-
- void configure(::internal::common::Tensor *inputTensor, ::internal::cpu::Tensor *outputTensor,
- const Shape &tensorShape);
-
- void run();
-
-private:
- ::internal::common::Tensor *_inputTensor;
- ::internal::cpu::Tensor *_outputTensor;
-
- Shape _tensorShape{1};
-};
-
-} // namespace cpu
-} // namespace kernel
-} // namespace neurun
-
-#endif // __NEURUN_KERNEL_CPU_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
-
-#endif
diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.cc b/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.cc
deleted file mode 100644
index 7d721f494..000000000
--- a/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-//
-// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
-//
-
-#if 0
-
-#include "TensorConvertToCommonLayer.h"
-
-#include "internal/nnapi/feature/Reader.h"
-#include "internal/nnapi/feature/View.h"
-
-#include <util/feature/IndexIterator.h>
-
-namespace neurun
-{
-namespace kernel
-{
-namespace cpu
-{
-
-bool TensorConvertToCommonLayer::convert()
-{
- auto inputBuffer = _inputTensor->buffer();
- auto inputSize = _inputTensor->info()->total_size();
-
- auto outputBuffer = _outputTensor->buffer();
- auto outputSize = _outputTensor->info()->total_size();
-
- if (_tensorShape.rank() == 2)
- {
- const auto len = _tensorShape.dim(1);
-
- auto base = reinterpret_cast<float *>(outputBuffer);
-
- for (int32_t n = 0; n < len; ++n)
- {
- auto from = reinterpret_cast<const float *>(
- _inputTensor->ptr_to_element(::arm_compute::Coordinates{n}));
- auto into = base + n;
-
- *into = *from;
- }
- }
- else if (_tensorShape.rank() == 4)
- {
- auto featureShape = _tensorShape.asFeature();
-
- const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize};
- ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize};
-
- ::nnfw::util::feature::iterate(featureShape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, ch, row, col);
- into.at(batch, ch, row, col) = value;
- };
- }
-}
-
-void TensorConvertToCommonLayer::configure(::internal::cpu::Tensor *inputTensor,
- ::internal::common::Tensor *outputTensor,
- const Shape &tensorShape)
-{
- _inputTensor = inputTensor;
- _outputTensor = outputTensor;
- _tensorShape = tensorShape;
-}
-
-void TensorConvertToCommonLayer::run() { convert(); }
-
-} // namespace cpu
-} // namespace kernel
-} // namespace neurun
-
-#endif
diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.h b/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.h
deleted file mode 100644
index 7e96d1aff..000000000
--- a/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-//
-// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
-//
-
-#if 0
-
-#ifndef __NEURUN_KERNEL_CPU_TENSOR_CONVERT_TO_COMMON_LAYER_H__
-#define __NEURUN_KERNEL_CPU_TENSOR_CONVERT_TO_COMMON_LAYER_H__
-
-#include <NeuralNetworks.h>
-
-#include <arm_compute/runtime/IFunction.h>
-
-#include "internal/Model.h"
-#include "internal/common/Tensor.h"
-#include "internal/cpu.h"
-
-namespace neurun
-{
-namespace kernel
-{
-namespace cpu
-{
-
-class TensorConvertToCommonLayer : public ::arm_compute::IFunction
-{
-public:
- TensorConvertToCommonLayer() {}
-
-public:
- bool convert();
-
- void configure(::internal::cpu::Tensor *inputTensor, ::internal::common::Tensor *outputTensor,
- const Shape &tensorShape);
-
- void run();
-
-private:
- ::internal::cpu::Tensor *_inputTensor;
- ::internal::common::Tensor *_outputTensor;
-
- Shape _tensorShape{1};
-};
-
-} // namespace cpu
-} // namespace kernel
-} // namespace neurun
-
-#endif // __NEURUN_KERNEL_CPU_TENSOR_CONVERT_TO_COMMON_LAYER_H__
-
-#endif