summaryrefslogtreecommitdiff
path: root/runtimes/neurun/src/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes/neurun/src/kernel')
-rw-r--r--runtimes/neurun/src/kernel/CMakeLists.txt2
-rw-r--r--runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt15
-rw-r--r--runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc158
-rw-r--r--runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h67
-rw-r--r--runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc94
-rw-r--r--runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.h67
-rw-r--r--runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc94
-rw-r--r--runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.h67
-rw-r--r--runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc118
-rw-r--r--runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h78
-rw-r--r--runtimes/neurun/src/kernel/cpu/CMakeLists.txt14
-rw-r--r--runtimes/neurun/src/kernel/cpu/ConcatLayer.cc109
-rw-r--r--runtimes/neurun/src/kernel/cpu/ConcatLayer.h66
-rw-r--r--runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc202
-rw-r--r--runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h79
-rw-r--r--runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc139
-rw-r--r--runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h69
-rw-r--r--runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc118
-rw-r--r--runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h78
-rw-r--r--runtimes/neurun/src/kernel/cpu/OperationUtils.cc230
-rw-r--r--runtimes/neurun/src/kernel/cpu/OperationUtils.h103
-rw-r--r--runtimes/neurun/src/kernel/cpu/ReshapeLayer.cc57
-rw-r--r--runtimes/neurun/src/kernel/cpu/ReshapeLayer.h58
-rw-r--r--runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc128
-rw-r--r--runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h64
-rw-r--r--runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.cc90
-rw-r--r--runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.h67
-rw-r--r--runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.cc90
-rw-r--r--runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.h67
29 files changed, 2588 insertions, 0 deletions
diff --git a/runtimes/neurun/src/kernel/CMakeLists.txt b/runtimes/neurun/src/kernel/CMakeLists.txt
new file mode 100644
index 000000000..a39823102
--- /dev/null
+++ b/runtimes/neurun/src/kernel/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(cpu)
+add_subdirectory(acl_cl)
diff --git a/runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt b/runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt
new file mode 100644
index 000000000..857fe6fe6
--- /dev/null
+++ b/runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt
@@ -0,0 +1,15 @@
+file(GLOB SOURCES "*.cc")
+
+add_library(${LIB_NEURUN_KERNEL_ACL_CL} STATIC ${SOURCES})
+
+target_include_directories(${LIB_NEURUN_KERNEL_ACL_CL} PUBLIC ${NNFW_INCLUDE_DIR})
+target_include_directories(${LIB_NEURUN_KERNEL_ACL_CL} PUBLIC ${NEURUN_INCLUDE_DIR})
+target_include_directories(${LIB_NEURUN_KERNEL_ACL_CL} PUBLIC ${CMAKE_SOURCE_DIR}/externals/tensorflow) # TODO We should not need this
+
+target_link_libraries(${LIB_NEURUN_KERNEL_ACL_CL} arm_compute)
+target_link_libraries(${LIB_NEURUN_KERNEL_ACL_CL} tensorflow-lite) # TODO We should not need this
+target_link_libraries(${LIB_NEURUN_KERNEL_ACL_CL} ${LIB_NEURUN_KERNEL_CPU}) # TODO We should not need this
+
+set_target_properties(${LIB_NEURUN_KERNEL_ACL_CL} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+set_target_properties(${LIB_NEURUN_KERNEL_ACL_CL} PROPERTIES OUTPUT_NAME kernel_acl_cl)
+install(TARGETS ${LIB_NEURUN_KERNEL_ACL_CL} DESTINATION lib/neurun)
diff --git a/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc
new file mode 100644
index 000000000..b75ac90f0
--- /dev/null
+++ b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConcatLayer.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+#include "backend/acl_cl/kernel/View.h"
+#include "logging.h"
+
+namespace
+{
+
+bool matchSizeExceptAxis(const ::arm_compute::ICLTensor *t1, const ::arm_compute::ICLTensor *t2,
+ uint32_t axis)
+{
+ assert(t1->info()->num_dimensions() <= 4);
+ assert(t2->info()->num_dimensions() <= 4);
+
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ if (axis == i)
+ continue;
+ if (t1->info()->dimension(i) != t2->info()->dimension(i))
+ return false;
+ }
+ return true;
+}
+
+} // namespace {anonymous}
+
+namespace neurun
+{
+namespace kernel
+{
+namespace acl_cl
+{
+
+ConcatLayer::ConcatLayer()
+ : _input_allocs(), _output_alloc(nullptr), _axis(0), _input_type(OperandType::SCALAR_FLOAT32)
+{
+ // DO NOTHING
+}
+
+bool ConcatLayer::concatenationFloat32()
+{
+ // Input and output size check
+ {
+ // NOTE Support only tensor with dimension 4 or less
+
+ uint32_t axis_sum = 0;
+
+ for (auto input : _input_allocs)
+ {
+ assert(matchSizeExceptAxis(_output_alloc, input, _axis));
+ axis_sum += input->info()->dimension(_axis);
+ }
+
+ assert(_output_alloc->info()->dimension(_axis) == axis_sum);
+ }
+
+ VERBOSE(Concat_RUN) << "START Concat" << std::endl;
+
+ // Perform operation
+ {
+ uint32_t axis_offset = 0;
+
+ auto &queue = ::arm_compute::CLScheduler::get().queue();
+
+ _output_alloc->map(queue);
+ ::internal::arm_compute::kernel::View<float> output_view{_output_alloc};
+
+ for (auto input : _input_allocs)
+ {
+ input->map(queue);
+ const ::internal::arm_compute::kernel::View<float> input_reader{input};
+
+ for (uint32_t n = 0; n < input_reader.shape().N; n++)
+ {
+ for (uint32_t c = 0; c < input_reader.shape().C; c++)
+ {
+ for (uint32_t h = 0; h < input_reader.shape().H; h++)
+ {
+ for (uint32_t w = 0; w < input_reader.shape().W; w++)
+ {
+ uint32_t no = (_axis == 3) ? axis_offset : 0;
+ uint32_t co = (_axis == 2) ? axis_offset : 0;
+ uint32_t ho = (_axis == 1) ? axis_offset : 0;
+ uint32_t wo = (_axis == 0) ? axis_offset : 0;
+ output_view.at(n + no, c + co, h + ho, w + wo) = input_reader.at(n, c, h, w);
+ }
+ }
+ }
+ }
+ if (_axis == 3)
+ axis_offset += input_reader.shape().N;
+ if (_axis == 2)
+ axis_offset += input_reader.shape().C;
+ if (_axis == 1)
+ axis_offset += input_reader.shape().H;
+ if (_axis == 0)
+ axis_offset += input_reader.shape().W;
+
+ input->unmap(queue);
+ }
+ _output_alloc->unmap(queue);
+ }
+
+ VERBOSE(Concat_RUN) << "End Concat" << std::endl;
+
+ return true;
+}
+
+void ConcatLayer::configure(const std::vector<::arm_compute::ICLTensor *> &input_allocs,
+ int32_t axis, ::arm_compute::ICLTensor *output_alloc)
+{
+ _input_allocs = input_allocs;
+ _output_alloc = output_alloc;
+
+ assert(axis < 4);
+
+ // This map converts NHWC to NCHW(reversed)
+ // NHWC -> WHCN
+ static const uint32_t axis_map[] = {3, 1, 0, 2};
+ _axis = axis_map[axis];
+
+ // TODO Support Quant8
+ _input_type = OperandType::TENSOR_FLOAT32;
+}
+
+void ConcatLayer::run()
+{
+ if (_input_type == OperandType::TENSOR_FLOAT32)
+ {
+ concatenationFloat32();
+ }
+ else if (_input_type == OperandType::TENSOR_QUANT8_ASYMM)
+ {
+ throw std::runtime_error("NYI - concatenationQuant8()");
+ }
+}
+
+} // namespace acl_cl
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h
new file mode 100644
index 000000000..4767721fa
--- /dev/null
+++ b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_KERNEL_ACL_CL_CONCAT_LAYER_H__
+#define __INTERNAL_KERNEL_ACL_CL_CONCAT_LAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/core/CL/ICLTensor.h>
+#include <arm_compute/runtime/IFunction.h>
+
+#include "graph/operand/DataType.h"
+
+using OperandType = neurun::graph::operand::DataType;
+
+namespace neurun
+{
+namespace kernel
+{
+namespace acl_cl
+{
+
+//
+// neurun::kernel::acl_cl::ConcatLayer
+// A naive implementation of ConcatLayer for ACL
+//
+
+class ConcatLayer : public ::arm_compute::IFunction
+{
+public:
+ ConcatLayer();
+
+public:
+ void configure(const std::vector<::arm_compute::ICLTensor *> &input_allocs,
+ int32_t axis /* NNAPI tensor axis from NHWC order */,
+ ::arm_compute::ICLTensor *output_alloc);
+
+ void run();
+
+private:
+ bool concatenationFloat32();
+
+private:
+ std::vector<::arm_compute::ICLTensor *> _input_allocs;
+ ::arm_compute::ICLTensor *_output_alloc;
+ int32_t _axis;
+ OperandType _input_type;
+};
+
+} // namespace acl_cl
+} // namespace kernel
+} // namespace neurun
+
+#endif // __INTERNAL_KERNEL_ACL_CL_CONCAT_LAYER_H__
diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc b/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc
new file mode 100644
index 000000000..fa1d77579
--- /dev/null
+++ b/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
+//
+
+#if 0
+
+#include "TensorConvertFromCommonLayer.h"
+
+#include "internal/nnapi/feature/Reader.h"
+#include "backend/acl_cl/feature/View.h"
+
+#include <util/feature/IndexIterator.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+namespace neurun
+{
+namespace kernel
+{
+namespace acl_cl
+{
+
+bool TensorConvertFromCommonLayer::convert()
+{
+ auto inputBuffer = _inputTensor->buffer();
+ auto inputSize = _inputTensor->info()->total_size();
+
+ auto &queue = ::arm_compute::CLScheduler::get().queue();
+
+ _outputTensor->map(queue);
+
+ if (_tensorShape.rank() == 2)
+ {
+ const auto len = _tensorShape.dim(1);
+
+ auto base = reinterpret_cast<const float *>(inputBuffer);
+
+ for (int32_t n = 0; n < len; ++n)
+ {
+ auto from = base + n;
+ auto into =
+ reinterpret_cast<float *>(_outputTensor->ptr_to_element(::arm_compute::Coordinates{n}));
+
+ *into = *from;
+ }
+ }
+ else if (_tensorShape.rank() == 4)
+ {
+ auto featureShape = _tensorShape.asFeature();
+
+ const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize};
+ ::internal::arm_compute::feature::View<float> into{_outputTensor};
+
+ ::nnfw::util::feature::iterate(featureShape)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, ch, row, col) = value;
+ };
+ }
+
+ _outputTensor->unmap(queue);
+}
+
+void TensorConvertFromCommonLayer::configure(::internal::common::Tensor *inputTensor,
+ ::arm_compute::ICLTensor *outputTensor,
+ const ::neurun::graph::operand::Shape &tensorShape)
+{
+ _inputTensor = inputTensor;
+ _outputTensor = outputTensor;
+ _tensorShape = tensorShape;
+}
+
+void TensorConvertFromCommonLayer::run() { convert(); }
+
+} // namespace acl_cl
+} // namespace kernel
+} // namespace neurun
+
+#endif
diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.h b/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.h
new file mode 100644
index 000000000..bd031a106
--- /dev/null
+++ b/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
+//
+
+#if 0
+
+#ifndef __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
+#define __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+#include <arm_compute/core/CL/ICLTensor.h>
+
+#include "internal/Model.h"
+#include "internal/common/Tensor.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace acl_cl
+{
+
+class TensorConvertFromCommonLayer : public ::arm_compute::IFunction
+{
+public:
+ TensorConvertFromCommonLayer() {}
+
+public:
+ bool convert();
+
+ void configure(::internal::common::Tensor *inputTensor, ::arm_compute::ICLTensor *outputTensor,
+ const ::neurun::graph::operand::Shape &tensorShape);
+
+ void run();
+
+private:
+ ::internal::common::Tensor *_inputTensor;
+ ::arm_compute::ICLTensor *_outputTensor;
+
+ ::neurun::graph::operand::Shape _tensorShape{1};
+};
+
+} // namespace acl_cl
+} // namespace kernel
+} // namespace neurun
+
+#endif // __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
+
+#endif
diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc b/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc
new file mode 100644
index 000000000..985524bc3
--- /dev/null
+++ b/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
+//
+
+#if 0
+
+#include "TensorConvertToCommonLayer.h"
+
+#include "backend/acl_cl/feature/View.h"
+#include "internal/nnapi/feature/View.h"
+
+#include <util/feature/IndexIterator.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+namespace neurun
+{
+namespace kernel
+{
+namespace acl_cl
+{
+
+bool TensorConvertToCommonLayer::convert()
+{
+ auto outputBuffer = _outputTensor->buffer();
+ auto outputSize = _outputTensor->info()->total_size();
+
+ auto &queue = ::arm_compute::CLScheduler::get().queue();
+
+ _inputTensor->map(queue);
+
+ if (_tensorShape.rank() == 2)
+ {
+ const auto len = _tensorShape.dim(1);
+
+ auto base = reinterpret_cast<float *>(outputBuffer);
+
+ for (int32_t n = 0; n < len; ++n)
+ {
+ auto from = reinterpret_cast<const float *>(
+ _inputTensor->ptr_to_element(::arm_compute::Coordinates{n}));
+ auto into = base + n;
+
+ *into = *from;
+ }
+ }
+ else if (_tensorShape.rank() == 4)
+ {
+ auto featureShape = _tensorShape.asFeature();
+
+ const ::internal::arm_compute::feature::View<float> from{_inputTensor};
+ ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize};
+
+ ::nnfw::util::feature::iterate(featureShape)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, ch, row, col) = value;
+ };
+ }
+
+ _inputTensor->unmap(queue);
+}
+
+void TensorConvertToCommonLayer::configure(::arm_compute::ICLTensor *inputTensor,
+ ::internal::common::Tensor *outputTensor,
+ const ::neurun::graph::operand::Shape &tensorShape)
+{
+ _inputTensor = inputTensor;
+ _outputTensor = outputTensor;
+ _tensorShape = tensorShape;
+}
+
+void TensorConvertToCommonLayer::run() { convert(); }
+
+} // namespace acl_cl
+} // namespace kernel
+} // namespace neurun
+
+#endif
diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.h b/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.h
new file mode 100644
index 000000000..576f1ee71
--- /dev/null
+++ b/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
+//
+
+#if 0
+
+#ifndef __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_TO_COMMON_LAYER_H__
+#define __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_TO_COMMON_LAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+#include <arm_compute/core/CL/ICLTensor.h>
+
+#include "internal/Model.h"
+#include "internal/common/Tensor.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace acl_cl
+{
+
+class TensorConvertToCommonLayer : public ::arm_compute::IFunction
+{
+public:
+ TensorConvertToCommonLayer() {}
+
+public:
+ bool convert();
+
+ void configure(::arm_compute::ICLTensor *inputTensor, ::internal::common::Tensor *outputTensor,
+ const ::neurun::graph::operand::Shape &tensorShape);
+
+ void run();
+
+private:
+ ::arm_compute::ICLTensor *_inputTensor;
+ ::internal::common::Tensor *_outputTensor;
+
+ ::neurun::graph::operand::Shape _tensorShape{1};
+};
+
+} // namespace acl_cl
+} // namespace kernel
+} // namespace neurun
+
+#endif // __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_TO_COMMON_LAYER_H__
+
+#endif
diff --git a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc
new file mode 100644
index 000000000..2a6a84e10
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AvgPoolLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+#define AVGPOOLING_PARAMETERS \
+ uint32_t height = getSizeOfDimension(_inputShape, 1); \
+ uint32_t width = getSizeOfDimension(_inputShape, 2); \
+ uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
+ uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
+ \
+ uint32_t paddingHeight = (uint32_t)_paddingTop; \
+ uint32_t paddingWidth = (uint32_t)_paddingLeft;
+
+AvgPoolLayer::AvgPoolLayer()
+ : _inputData(nullptr), _outputData(nullptr), _inputShape(), _outputShape(), _paddingLeft(0),
+ _paddingTop(0), _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0),
+ _kernelWidth(0), _kernelHeight(0), _activation(ANEURALNETWORKS_FUSED_NONE),
+ _inputType(OperandType::SCALAR_FLOAT32)
+{
+ // DO NOTHING
+}
+
+bool AvgPoolLayer::averagePoolFloat32()
+{
+
+ AVGPOOLING_PARAMETERS
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+
+ ::tflite::optimized_ops::AveragePool(
+ reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), _strideWidth,
+ _strideHeight, paddingWidth, paddingHeight, _kernelWidth, _kernelHeight,
+ output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
+ convertShapeToDims(_outputShape));
+ return true;
+}
+bool AvgPoolLayer::averagePoolQuant8()
+{
+
+ AVGPOOLING_PARAMETERS
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
+ &output_activation_max);
+
+ ::tflite::optimized_ops::AveragePool(_inputData, convertShapeToDims(_inputShape), _strideWidth,
+ _strideHeight, paddingWidth, paddingHeight, _kernelWidth,
+ _kernelHeight, output_activation_min, output_activation_max,
+ _outputData, convertShapeToDims(_outputShape));
+ return true;
+}
+
+void AvgPoolLayer::configure(uint8_t *inputData, const Shape inputShape, const uint32_t paddingLeft,
+ const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t kernelWidth,
+ const uint32_t kernelHeight, const FuseCode activation,
+ uint8_t *outputData, const Shape outputShape)
+{
+ _inputData = inputData;
+ _inputShape = inputShape;
+ _inputType = inputShape.type;
+ _paddingLeft = paddingLeft;
+ _paddingRight = paddingRight;
+ _paddingTop = paddingTop;
+ _paddingBottom = paddingBottom;
+ _strideWidth = strideWidth;
+ _strideHeight = strideHeight;
+ _kernelWidth = kernelWidth;
+ _kernelHeight = kernelHeight;
+ _activation = activation;
+ _outputData = outputData;
+ _outputShape = outputShape;
+}
+
+void AvgPoolLayer::run()
+{
+ if (_inputType == OperandType::TENSOR_FLOAT32)
+ {
+ averagePoolFloat32();
+ }
+ else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM)
+ {
+ throw std::runtime_error{"AvgPoolLayer : Not tested for TENSOR_QUANT8_ASYMM"};
+ // averagePoolQuant8();
+ }
+}
+
+#undef AVGPOOLING_PARAMETERS
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h
new file mode 100644
index 000000000..9f390a9e1
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_KERNEL_CPU_AVGPOOLLAYER_H__
+#define __NEURUN_KERNEL_CPU_AVGPOOLLAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class AvgPoolLayer : public ::arm_compute::IFunction
+{
+public:
+ AvgPoolLayer();
+
+public:
+ bool averagePoolFloat32();
+
+ bool averagePoolQuant8();
+
+ void configure(uint8_t *inputData, const Shape inputShape, const uint32_t paddingLeft,
+ const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t kernelWidth,
+ const uint32_t kernelHeight, const FuseCode activation, uint8_t *outputData,
+ const Shape outputShape);
+
+ void run();
+
+private:
+ uint8_t *_inputData;
+ uint8_t *_outputData;
+
+ Shape _inputShape;
+ Shape _outputShape;
+
+ uint32_t _paddingLeft;
+ uint32_t _paddingTop;
+ uint32_t _paddingRight;
+ uint32_t _paddingBottom;
+
+ uint32_t _strideWidth;
+ uint32_t _strideHeight;
+ uint32_t _kernelWidth;
+ uint32_t _kernelHeight;
+
+ FuseCode _activation;
+
+ OperandType _inputType;
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_AVGPOOLLAYER_H__
diff --git a/runtimes/neurun/src/kernel/cpu/CMakeLists.txt b/runtimes/neurun/src/kernel/cpu/CMakeLists.txt
new file mode 100644
index 000000000..dddf154c3
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/CMakeLists.txt
@@ -0,0 +1,14 @@
+file(GLOB SOURCES "*.cc")
+
+add_library(${LIB_NEURUN_KERNEL_CPU} STATIC ${SOURCES})
+
+target_include_directories(${LIB_NEURUN_KERNEL_CPU} PUBLIC ${NNFW_INCLUDE_DIR})
+target_include_directories(${LIB_NEURUN_KERNEL_CPU} PUBLIC ${NEURUN_INCLUDE_DIR})
+target_include_directories(${LIB_NEURUN_KERNEL_CPU} PUBLIC ${CMAKE_SOURCE_DIR}/externals/tensorflow)
+
+target_link_libraries(${LIB_NEURUN_KERNEL_CPU} arm_compute) # TODO We should not need this
+target_link_libraries(${LIB_NEURUN_KERNEL_CPU} tensorflow-lite)
+
+set_target_properties(${LIB_NEURUN_KERNEL_CPU} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+set_target_properties(${LIB_NEURUN_KERNEL_CPU} PROPERTIES OUTPUT_NAME kernel_cpu)
+install(TARGETS ${LIB_NEURUN_KERNEL_CPU} DESTINATION lib/neurun)
diff --git a/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc b/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc
new file mode 100644
index 000000000..5fe5e3993
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConcatLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+ConcatLayer::ConcatLayer()
+ : _inputDataPtrs(), _outputData(nullptr), _axis(0), _inputShapes(), _outputShape(),
+ _inputType(OperandType::SCALAR_FLOAT32)
+{
+ // DO NOTHING
+}
+
+bool ConcatLayer::concatenationFloat32()
+{
+ int num_inputs = _inputShapes.size();
+ std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs);
+ std::vector<::tflite::Dims<4>> inputDims(num_inputs);
+ for (int i = 0; i < num_inputs; i++)
+ {
+ inputDims[i] = convertShapeToDims(_inputShapes[i]);
+ inputDimsPtr[i] = &inputDims[i];
+ }
+
+ std::vector<const float *> inputFloatPtrs;
+
+ for (auto ptr : _inputDataPtrs)
+ {
+ inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(ptr));
+ }
+
+ ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, float>(
+ getNumberOfDimensions(_outputShape) - _axis - 1, inputFloatPtrs.data(), inputDimsPtr.data(),
+ num_inputs, reinterpret_cast<float *>(_outputData), convertShapeToDims(_outputShape));
+ return true;
+}
+bool ConcatLayer::concatenationQuant8()
+{
+ int num_inputs = _inputShapes.size();
+ std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs);
+ std::vector<::tflite::Dims<4>> inputDims(num_inputs);
+ for (int i = 0; i < num_inputs; i++)
+ {
+ inputDims[i] = convertShapeToDims(_inputShapes[i]);
+ inputDimsPtr[i] = &inputDims[i];
+ }
+ ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, uint8_t>(
+ getNumberOfDimensions(_outputShape) - _axis - 1, _inputDataPtrs.data(), inputDimsPtr.data(),
+ num_inputs, _outputData, convertShapeToDims(_outputShape));
+ return true;
+}
+
+void ConcatLayer::configure(const std::vector<const uint8_t *> &inputDataPtrs,
+ const std::vector<Shape> &inputShapes, int32_t axis,
+ uint8_t *outputData, const Shape outputShape)
+{
+ _inputDataPtrs = inputDataPtrs;
+
+ for (auto shape : inputShapes)
+ {
+ _inputShapes.emplace_back(shape);
+ _inputType = shape.type;
+ }
+
+ _axis = axis;
+
+ _outputData = outputData;
+ _outputShape = outputShape;
+}
+
+void ConcatLayer::run()
+{
+ if (_inputType == OperandType::TENSOR_FLOAT32)
+ {
+ concatenationFloat32();
+ }
+ else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM)
+ {
+ throw std::runtime_error{"ConcatLayer : Not tested for TENSOR_QUANT8_ASYMM"};
+ // concatenationQuant8();
+ }
+}
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/cpu/ConcatLayer.h b/runtimes/neurun/src/kernel/cpu/ConcatLayer.h
new file mode 100644
index 000000000..9aacab5e8
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/ConcatLayer.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_KERNEL_CPU_CONCATLAYER_H__
+#define __NEURUN_KERNEL_CPU_CONCATLAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class ConcatLayer : public ::arm_compute::IFunction
+{
+public:
+ ConcatLayer();
+
+public:
+ bool concatenationFloat32();
+
+ bool concatenationQuant8();
+
+ void configure(const std::vector<const uint8_t *> &inputDataPtrs,
+ const std::vector<Shape> &inputShapes, int32_t axis, uint8_t *outputData,
+ const Shape outputShape);
+
+ void run();
+
+private:
+ std::vector<const uint8_t *> _inputDataPtrs;
+ uint8_t *_outputData;
+
+ int32_t _axis;
+
+ std::vector<Shape> _inputShapes;
+ Shape _outputShape;
+
+ OperandType _inputType;
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_CONCATLAYER_H__
diff --git a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc
new file mode 100644
index 000000000..81e88e0f0
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "kernel/cpu/OperationUtils.h"
+
+#include <mutex>
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+// If possible we will use this static buffer for the tensor.
+static constexpr int kStaticBufferSize = 1605632;
+static char static_scratch_buffer[kStaticBufferSize];
+static std::mutex executionMutex;
+
+#define ANDROID_NN_CONV_PARAMETERS(Type) \
+ uint32_t height = getSizeOfDimension(_inputShape, 1); \
+ uint32_t width = getSizeOfDimension(_inputShape, 2); \
+ uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \
+ uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \
+ uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
+ uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
+ uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \
+ \
+ uint32_t paddingHeight = (uint32_t)_paddingTop; \
+ uint32_t paddingWidth = (uint32_t)_paddingLeft; \
+ \
+ ::tflite::Dims<4> im2colDim; \
+ im2colDim.sizes[3] = (int)getSizeOfDimension(_outputShape, 0); \
+ im2colDim.sizes[2] = (int)getSizeOfDimension(_outputShape, 1); \
+ im2colDim.sizes[1] = (int)getSizeOfDimension(_outputShape, 2); \
+ im2colDim.sizes[0] = (int)inDepth * kernelHeight * kernelWidth; \
+ \
+ im2colDim.strides[0] = 1; \
+ for (int i = 1; i < 4; i++) \
+ { \
+ im2colDim.strides[i] = im2colDim.strides[i - 1] * im2colDim.sizes[i - 1]; \
+ } \
+ Type *im2colData = nullptr; \
+ uint64_t im2colByteSize = sizeof(Type); \
+ std::unique_ptr<Type[]> im2colGuard; \
+ for (int i = 0; i < 4; i++) \
+ { \
+ im2colByteSize *= im2colDim.sizes[i]; \
+ } \
+ /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \
+ if (im2colByteSize >= 0x7fffffff) \
+ { \
+ std::cout << "Conv size is too large, not enough memory" << std::endl; \
+ return false; \
+ } \
+ if (im2colByteSize <= kStaticBufferSize) \
+ { \
+ im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \
+ } \
+ else \
+ { \
+ im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \
+ if (im2colData == nullptr) \
+ { \
+ std::cout << "Conv size is too large, not enough memory" << std::endl; \
+ return false; \
+ } \
+ im2colGuard.reset(im2colData); \
+ }
+
+ConvolutionLayer::ConvolutionLayer()
+ : _inputData(nullptr), _kernelData(nullptr), _outputData(nullptr), _biasData(nullptr),
+ _inputShape(), _kernelShape(), _outputShape(), _biasShape(), _paddingLeft(0), _paddingTop(0),
+ _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0),
+ _activation(ANEURALNETWORKS_FUSED_NONE), _inputType(OperandType::SCALAR_FLOAT32)
+{
+ // DO NOTHING
+}
+
+bool ConvolutionLayer::convFloat32()
+{
+ ANDROID_NN_CONV_PARAMETERS(float)
+
+ const ::tflite::Dims<4> &kernel_dim = convertShapeToDims(_kernelShape);
+ const int kernel_width = ArraySize(kernel_dim, 1);
+ const int kernel_height = ArraySize(kernel_dim, 2);
+ const bool need_im2col =
+ _strideWidth != 1 || _strideHeight != 1 || kernel_width != 1 || kernel_height != 1;
+
+ float *im2colDataToPass = nullptr;
+ if (need_im2col)
+ {
+ im2colDataToPass = im2colData;
+ }
+
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ int32_t dilationWidthFactor = 1, dilationHeightFactor = 1;
+ ::tflite::optimized_ops::Conv(
+ reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
+ reinterpret_cast<const float *>(_kernelData), convertShapeToDims(_kernelShape),
+ reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape), _strideWidth,
+ _strideHeight, dilationWidthFactor, dilationHeightFactor, paddingWidth, paddingHeight,
+ output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
+ convertShapeToDims(_outputShape), im2colDataToPass, im2colDim);
+ return true;
+}
+
+bool ConvolutionLayer::convQuant8()
+{
+ ANDROID_NN_CONV_PARAMETERS(uint8_t)
+ int32_t inputOffset = -_inputShape.offset;
+ int32_t kernelOffset = -_kernelShape.offset;
+ int32_t outputOffset = _outputShape.offset;
+ float real_multiplier = 0.0;
+ int32_t output_multiplier = 0;
+ int32_t output_shift = 0;
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ if (!GetQuantizedConvolutionMultipler(_inputShape, _kernelShape, _biasShape, _outputShape,
+ &real_multiplier) ||
+ !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift))
+ {
+ return false;
+ }
+ CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
+ &output_activation_max);
+ static gemmlowp::GemmContext gemm_context;
+ // Prevent concurrent executions that may access the scratch buffer and
+ // gemm_context.
+ std::unique_lock<std::mutex> lock(executionMutex);
+ // Alow gemmlowp automatically decide how many threads to use.
+ gemm_context.set_max_num_threads(0);
+ ::tflite::optimized_ops::Conv(
+ _inputData, convertShapeToDims(_inputShape), inputOffset, _kernelData,
+ convertShapeToDims(_kernelShape), kernelOffset, reinterpret_cast<const int32_t *>(_biasData),
+ convertShapeToDims(_biasShape), _strideWidth, _strideHeight, paddingWidth, paddingHeight,
+ outputOffset, output_multiplier, output_shift, output_activation_min, output_activation_max,
+ _outputData, convertShapeToDims(_outputShape), im2colData, im2colDim, &gemm_context);
+ return true;
+}
+
+void ConvolutionLayer::configure(uint8_t *inputData, const Shape inputShape, uint8_t *kernelData,
+ const Shape kernelShape, uint8_t *biasData, const Shape biasShape,
+ const uint32_t paddingLeft, const uint32_t paddingRight,
+ const uint32_t paddingTop, const uint32_t paddingBottom,
+ const uint32_t strideWidth, const uint32_t strideHeight,
+ const FuseCode activation, uint8_t *outputData,
+ const Shape outputShape)
+{
+ _inputData = inputData;
+ _inputShape = inputShape;
+ _inputType = inputShape.type;
+ _kernelData = kernelData;
+ _kernelShape = kernelShape;
+ _biasData = biasData;
+ _biasShape = biasShape;
+ _paddingLeft = paddingLeft;
+ _paddingRight = paddingRight;
+ _paddingTop = paddingTop;
+ _paddingBottom = paddingBottom;
+ _strideWidth = strideWidth;
+ _strideHeight = strideHeight;
+ _activation = activation;
+ _outputData = outputData;
+ _outputShape = outputShape;
+}
+
+void ConvolutionLayer::run()
+{
+ if (_inputType == OperandType::TENSOR_FLOAT32)
+ {
+ convFloat32();
+ }
+ else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM)
+ {
+ throw std::runtime_error{"ConvolutionLayer : Not tested for TENSOR_QUANT8_ASYMM"};
+ // convQuant8();
+ }
+}
+
+#undef ANDROID_NN_CONV_PARAMETERS
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h
new file mode 100644
index 000000000..b7afbcec6
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_KERNEL_CPU_CONVOLUTIONLAYER_H__
+#define __NEURUN_KERNEL_CPU_CONVOLUTIONLAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class ConvolutionLayer : public ::arm_compute::IFunction
+{
+public:
+ ConvolutionLayer();
+
+public:
+ bool convFloat32();
+
+ bool convQuant8();
+
+ void configure(uint8_t *inputData, const Shape inputShape, uint8_t *kernelData,
+ const Shape kernelShape, uint8_t *biasData, const Shape biasShape,
+ const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
+ const FuseCode activation, uint8_t *outputData, const Shape outputShape);
+
+ void run();
+
+private:
+ uint8_t *_inputData;
+ uint8_t *_kernelData;
+ uint8_t *_outputData;
+ uint8_t *_biasData;
+
+ Shape _inputShape;
+ Shape _kernelShape;
+ Shape _outputShape;
+ Shape _biasShape;
+
+ uint32_t _paddingLeft;
+ uint32_t _paddingTop;
+ uint32_t _paddingRight;
+ uint32_t _paddingBottom;
+
+ uint32_t _strideWidth;
+ uint32_t _strideHeight;
+
+ FuseCode _activation;
+
+ OperandType _inputType;
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_CONVOLUTIONLAYER_H__
diff --git a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
new file mode 100644
index 000000000..41b9afc0c
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "kernel/cpu/OperationUtils.h"
+
+#include <mutex>
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+FullyConnectedLayer::FullyConnectedLayer()
+ : _inputData(nullptr), _weightsData(nullptr), _biasData(nullptr), _outputData(nullptr),
+ _inputShape(), _weightsShape(), _biasShape(), _outputShape(),
+ _activation(ANEURALNETWORKS_FUSED_NONE), _inputType(OperandType::SCALAR_FLOAT32)
+{
+ // DO NOTHING
+}
+
+// executionMutex is used to protect concurrent access of non-threadsafe resources
+// like gemmlowp::GemmContext.
+// std::mutex is safe for pthreads on Android.
+static std::mutex executionMutex;
+bool FullyConnectedLayer::fullyConnectedFloat32()
+{
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ // b/80425683, optimized implementation produces incorrect results when the
+ // number of input elements is the squre of batch_size.
+ uint32_t batch_size = getSizeOfDimension(_outputShape, 0);
+ uint32_t input_n_elements = getNumberOfElements(_inputShape);
+ if (batch_size * batch_size == input_n_elements)
+ {
+ ::tflite::reference_ops::FullyConnected(
+ reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
+ reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape),
+ reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape),
+ output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
+ convertShapeToDims(_outputShape));
+ }
+ else
+ {
+ ::tflite::optimized_ops::FullyConnected(
+ reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
+ reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape),
+ reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape),
+ output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
+ convertShapeToDims(_outputShape));
+ }
+ return true;
+}
+
+bool FullyConnectedLayer::fullyConnectedQuant8()
+{
+ int32_t inputOffset = -_inputShape.offset;
+ int32_t weightsOffset = -_weightsShape.offset;
+ int32_t outputOffset = _outputShape.offset;
+ float real_multiplier = 0.0;
+ int32_t output_multiplier = 0;
+ int32_t output_shift = 0;
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ // Caution : 'Convolution' can make misleading. It seems it is just math term.
+ if (!GetQuantizedConvolutionMultipler(_inputShape, _weightsShape, _biasShape, _outputShape,
+ &real_multiplier) ||
+ !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift))
+ {
+ return false;
+ }
+ CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
+ &output_activation_max);
+ static gemmlowp::GemmContext gemm_context;
+ // Prevent concurrent executions that access gemm_context.
+ std::unique_lock<std::mutex> lock(executionMutex);
+ // Alow gemmlowp automatically decide how many threads to use.
+ gemm_context.set_max_num_threads(0);
+ ::tflite::optimized_ops::FullyConnected(
+ _inputData, convertShapeToDims(_inputShape), inputOffset, _weightsData,
+ convertShapeToDims(_weightsShape), weightsOffset,
+ reinterpret_cast<const int32_t *>(_biasData), convertShapeToDims(_biasShape), outputOffset,
+ output_multiplier, output_shift, output_activation_min, output_activation_max, _outputData,
+ convertShapeToDims(_outputShape), &gemm_context);
+ return true;
+}
+
+void FullyConnectedLayer::configure(uint8_t *inputData, const Shape inputShape,
+ uint8_t *weightsData, const Shape weightsShape,
+ uint8_t *biasData, const Shape biasShape, FuseCode activation,
+ uint8_t *outputData, const Shape outputShape)
+{
+ _inputData = inputData;
+ _inputShape = inputShape;
+ _inputType = inputShape.type;
+ _weightsData = weightsData;
+ _weightsShape = weightsShape;
+ _biasData = biasData;
+ _biasShape = biasShape;
+ _activation = activation;
+ _outputData = outputData;
+ _outputShape = outputShape;
+}
+
+void FullyConnectedLayer::run()
+{
+ if (_inputType == OperandType::TENSOR_FLOAT32)
+ {
+ fullyConnectedFloat32();
+ }
+ else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM)
+ {
+ throw std::runtime_error{"FullyConnectedLayer : Not tested for TENSOR_QUANT8_ASYMM"};
+ // fullyConnectedQuant8();
+ }
+}
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h
new file mode 100644
index 000000000..b1ba172b0
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_KERNEL_CPU_FULLYCONNECTEDLAYER_H__
+#define __NEURUN_KERNEL_CPU_FULLYCONNECTEDLAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class FullyConnectedLayer : public ::arm_compute::IFunction
+{
+public:
+ FullyConnectedLayer();
+
+public:
+ bool fullyConnectedFloat32();
+
+ bool fullyConnectedQuant8();
+
+ void configure(uint8_t *inputData, const Shape inputShape, uint8_t *weightsData,
+ const Shape weightsShape, uint8_t *biasData, const Shape biasShape,
+ FuseCode activation, uint8_t *outputData, const Shape outputShape);
+
+ void run();
+
+private:
+ uint8_t *_inputData;
+ uint8_t *_weightsData;
+ uint8_t *_biasData;
+ uint8_t *_outputData;
+
+ Shape _inputShape;
+ Shape _weightsShape;
+ Shape _biasShape;
+ Shape _outputShape;
+
+ FuseCode _activation;
+
+ OperandType _inputType;
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_FULLYCONNECTEDLAYER_H__
diff --git a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc
new file mode 100644
index 000000000..3d96bb401
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MaxPoolLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+#define MAXPOOLING_PARAMETERS \
+ uint32_t height = getSizeOfDimension(_inputShape, 1); \
+ uint32_t width = getSizeOfDimension(_inputShape, 2); \
+ uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
+ uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
+ \
+ uint32_t paddingHeight = (uint32_t)_paddingTop; \
+ uint32_t paddingWidth = (uint32_t)_paddingLeft;
+
+MaxPoolLayer::MaxPoolLayer()
+ : _inputData(nullptr), _outputData(nullptr), _inputShape(), _outputShape(), _paddingLeft(0),
+ _paddingTop(0), _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0),
+ _kernelWidth(0), _kernelHeight(0), _activation(ANEURALNETWORKS_FUSED_NONE),
+ _inputType(OperandType::SCALAR_FLOAT32)
+{
+ // DO NOTHING
+}
+
+bool MaxPoolLayer::maxPoolFloat32()
+{
+
+ MAXPOOLING_PARAMETERS
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+
+ ::tflite::optimized_ops::MaxPool(
+ reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), _strideWidth,
+ _strideHeight, paddingWidth, paddingHeight, _kernelWidth, _kernelHeight,
+ output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
+ convertShapeToDims(_outputShape));
+ return true;
+}
+bool MaxPoolLayer::maxPoolQuant8()
+{
+
+ MAXPOOLING_PARAMETERS
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
+ &output_activation_max);
+
+ ::tflite::optimized_ops::MaxPool(_inputData, convertShapeToDims(_inputShape), _strideWidth,
+ _strideHeight, paddingWidth, paddingHeight, _kernelWidth,
+ _kernelHeight, output_activation_min, output_activation_max,
+ _outputData, convertShapeToDims(_outputShape));
+ return true;
+}
+
+void MaxPoolLayer::configure(uint8_t *inputData, const Shape inputShape, const uint32_t paddingLeft,
+ const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t kernelWidth,
+ const uint32_t kernelHeight, const FuseCode activation,
+ uint8_t *outputData, const Shape outputShape)
+{
+ _inputData = inputData;
+
+ _inputShape = inputShape;
+ _inputType = inputShape.type;
+ _paddingLeft = paddingLeft;
+ _paddingRight = paddingRight;
+ _paddingTop = paddingTop;
+ _paddingBottom = paddingBottom;
+ _strideWidth = strideWidth;
+ _strideHeight = strideHeight;
+ _kernelWidth = kernelWidth;
+ _kernelHeight = kernelHeight;
+ _activation = activation;
+ _outputData = outputData;
+ _outputShape = outputShape;
+}
+
+void MaxPoolLayer::run()
+{
+ if (_inputType == OperandType::TENSOR_FLOAT32)
+ {
+ maxPoolFloat32();
+ }
+ else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM)
+ {
+ throw std::runtime_error{"MaxPoolLayer : Not tested for TENSOR_QUANT8_ASYMM"};
+ // maxPoolQuant8();
+ }
+}
+
+#undef MAXPOOLING_PARAMETERS
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h
new file mode 100644
index 000000000..b42efb9f6
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_KERNEL_CPU_MAXPOOLLAYER_H__
+#define __NEURUN_KERNEL_CPU_MAXPOOLLAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class MaxPoolLayer : public ::arm_compute::IFunction
+{
+public:
+ MaxPoolLayer();
+
+public:
+ bool maxPoolFloat32();
+
+ bool maxPoolQuant8();
+
+ void configure(uint8_t *inputData, const Shape inputShape, const uint32_t paddingLeft,
+ const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t kernelWidth,
+ const uint32_t kernelHeight, const FuseCode activation, uint8_t *outputData,
+ const Shape outputShape);
+
+ void run();
+
+private:
+ uint8_t *_inputData;
+ uint8_t *_outputData;
+
+ Shape _inputShape;
+ Shape _outputShape;
+
+ uint32_t _paddingLeft;
+ uint32_t _paddingTop;
+ uint32_t _paddingRight;
+ uint32_t _paddingBottom;
+
+ uint32_t _strideWidth;
+ uint32_t _strideHeight;
+ uint32_t _kernelWidth;
+ uint32_t _kernelHeight;
+
+ FuseCode _activation;
+
+ OperandType _inputType;
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_MAXPOOLLAYER_H__
diff --git a/runtimes/neurun/src/kernel/cpu/OperationUtils.cc b/runtimes/neurun/src/kernel/cpu/OperationUtils.cc
new file mode 100644
index 000000000..5ec2f8e62
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/OperationUtils.cc
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/cpu/OperationUtils.h"
+
+#include <cmath>
+#include <algorithm>
+#include <cassert>
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+uint32_t getNumberOfDimensions(const Shape &shape) { return shape.dimensions.size(); }
+
+uint32_t getNumberOfElements(const Shape &shape)
+{
+ uint32_t count = 1;
+ for (size_t i = 0; i < shape.dimensions.size(); i++)
+ {
+ count *= shape.dimensions[i];
+ }
+ return count;
+}
+
+uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx)
+{
+ if (dimensionIdx >= shape.dimensions.size())
+ {
+ // TODO, log the error
+ return 0;
+ }
+ return shape.dimensions[dimensionIdx];
+}
+
+bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier,
+ int32_t *right_shift)
+{
+ assert(double_multiplier >= 0.);
+ assert(double_multiplier < 1.);
+ if (double_multiplier == 0.)
+ {
+ *quantized_multiplier = 0;
+ *right_shift = 0;
+ return true;
+ }
+ assert(double_multiplier > 0.);
+ const double q = std::frexp(double_multiplier, right_shift);
+ *right_shift *= -1;
+ int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+ assert(q_fixed <= (1ll << 31));
+ if (q_fixed == (1ll << 31))
+ {
+ q_fixed /= 2;
+ --*right_shift;
+ }
+ assert(*right_shift >= 0);
+ assert(q_fixed <= std::numeric_limits<int32_t>::max());
+ *quantized_multiplier = static_cast<int32_t>(q_fixed);
+ return true;
+}
+
+bool GetQuantizedConvolutionMultipler(const Shape &inputShape, const Shape &filterShape,
+ const Shape &biasShape, const Shape &outputShape,
+ float *multiplier)
+{
+ const float input_product_scale = inputShape.scale * filterShape.scale;
+ const float bias_scale = biasShape.scale;
+ const float output_scale = outputShape.scale;
+ // The following conditions must be guaranteed by the training pipeline.
+ assert(std::abs(input_product_scale - bias_scale) <=
+ 1e-6 * std::min(input_product_scale, bias_scale));
+ assert(input_product_scale >= 0);
+ assert(input_product_scale < output_scale);
+ *multiplier = input_product_scale / output_scale;
+ return true;
+}
+
+bool QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
+ int *left_shift)
+{
+ assert(double_multiplier > 1.);
+ const double q = std::frexp(double_multiplier, left_shift);
+ int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+ assert(q_fixed <= (1ll << 31));
+ if (q_fixed == (1ll << 31))
+ {
+ q_fixed /= 2;
+ ++*left_shift;
+ }
+ assert(*left_shift >= 0);
+ assert(q_fixed <= std::numeric_limits<int32_t>::max());
+ *quantized_multiplier = static_cast<int32_t>(q_fixed);
+ return true;
+}
+
+void CalculateActivationRangeFloat(int32_t activation, float *activation_min, float *activation_max)
+{
+ if (activation == ANEURALNETWORKS_FUSED_RELU)
+ {
+ *activation_min = 0.f;
+ *activation_max = std::numeric_limits<float>::max();
+ }
+ else if (activation == ANEURALNETWORKS_FUSED_RELU6)
+ {
+ *activation_min = 0.f;
+ *activation_max = 6.f;
+ }
+ else if (activation == ANEURALNETWORKS_FUSED_RELU1)
+ {
+ *activation_min = -1.f;
+ *activation_max = 1.f;
+ }
+ else if (activation == ANEURALNETWORKS_FUSED_NONE)
+ {
+ *activation_min = std::numeric_limits<float>::lowest();
+ *activation_max = std::numeric_limits<float>::max();
+ }
+ else
+ {
+ std::cout << "Unsupported fused activation function." << std::endl;
+ }
+}
+
+void CalculateActivationRangeUint8(int32_t activation, const Shape &outputShape, int32_t *act_min,
+ int32_t *act_max)
+{
+ const int32_t qmin = std::numeric_limits<uint8_t>::min();
+ const int32_t qmax = std::numeric_limits<uint8_t>::max();
+ const auto scale = outputShape.scale;
+ const auto zero_point = outputShape.offset;
+ auto quantize = [scale, zero_point](float f) {
+ return zero_point + static_cast<int32_t>(std::round(f / scale));
+ };
+ if (activation == ANEURALNETWORKS_FUSED_RELU)
+ {
+ *act_min = std::max(qmin, quantize(0.0));
+ *act_max = qmax;
+ }
+ else if (activation == ANEURALNETWORKS_FUSED_RELU6)
+ {
+ *act_min = std::max(qmin, quantize(0.0));
+ *act_max = std::min(qmax, quantize(6.0));
+ }
+ else if (activation == ANEURALNETWORKS_FUSED_RELU1)
+ {
+ *act_min = std::max(qmin, quantize(-1.0));
+ *act_max = std::min(qmax, quantize(1.0));
+ }
+ else if (activation == ANEURALNETWORKS_FUSED_NONE)
+ {
+ *act_min = qmin;
+ *act_max = qmax;
+ }
+ else
+ {
+ std::cout << "Unsupported fused activation function." << std::endl;
+ }
+}
+
+int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift)
+{
+ const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
+ (1ll << (31 - input_integer_bits)) / (1ll << input_left_shift);
+ // Tighten bound using floor. Suppose that we could use the exact value.
+ // After scaling the difference, the result would be at the maximum. Thus we
+ // must ensure that our value has lower magnitude.
+ return static_cast<int32_t>(std::floor(max_input_rescaled));
+}
+
+Shape getShape(const ::neurun::graph::operand::Object &o)
+{
+ Shape shape;
+
+ shape.type = static_cast<OperandType>(static_cast<int32_t>(o.typeInfo().type()));
+ shape.dimensions = std::vector<uint32_t>(o.shape().dims().begin(), o.shape().dims().end());
+ shape.scale = o.typeInfo().scale();
+ // shape.offset = _offset;
+
+ return shape;
+}
+
+size_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions)
+{
+ size_t size = 4;
+
+ switch (type)
+ {
+ case OperandType::SCALAR_FLOAT32:
+ case OperandType::SCALAR_INT32:
+ case OperandType::SCALAR_UINT32:
+ case OperandType::TENSOR_FLOAT32:
+ case OperandType::TENSOR_INT32:
+ size = 4;
+ break;
+ case OperandType::TENSOR_QUANT8_ASYMM:
+ size = 1;
+ break;
+ default:
+ throw std::runtime_error("Not supported operand type.");
+ break;
+ }
+
+ for (auto d : dimensions)
+ {
+ size *= d;
+ }
+
+ return size;
+}
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/cpu/OperationUtils.h b/runtimes/neurun/src/kernel/cpu/OperationUtils.h
new file mode 100644
index 000000000..5914d04e3
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/OperationUtils.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
+#define __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
+
+#include <NeuralNetworks.h>
+
+#include <iostream>
+#include <limits>
+#include <vector>
+
+#include "tensorflow/contrib/lite/kernels/internal/types.h"
+#include "graph/operand/Object.h"
+#include "graph/operand/DataType.h"
+
+using OperandType = neurun::graph::operand::DataType;
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+struct Shape
+{
+ OperandType type;
+ std::vector<uint32_t> dimensions;
+ float scale;
+ int32_t offset;
+};
+
+uint32_t getNumberOfDimensions(const Shape &shape);
+
+uint32_t getNumberOfElements(const Shape &shape);
+
+uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx);
+
+inline ::tflite::Dims<4> convertShapeToDims(const Shape &shape)
+{
+ // nnAssert(shape.dimensions.size() <= 4);
+ ::tflite::Dims<4> dims;
+ // The dimensions are reversed in Dims<4>.
+ for (int i = 0; i < 4; ++i)
+ {
+ int src = static_cast<int>(shape.dimensions.size()) - i - 1;
+ if (src >= 0)
+ {
+ dims.sizes[i] = static_cast<int>(getSizeOfDimension(shape, src));
+ }
+ else
+ {
+ dims.sizes[i] = 1;
+ }
+ }
+ dims.strides[0] = 1;
+ for (int i = 1; i < 4; i++)
+ {
+ dims.strides[i] = dims.strides[i - 1] * dims.sizes[i - 1];
+ }
+ return dims;
+}
+
+__wur bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier,
+ int32_t *right_shift);
+
+__wur bool GetQuantizedConvolutionMultipler(const Shape &inputShape, const Shape &filterShape,
+ const Shape &biasShape, const Shape &outputShape,
+ float *multiplier);
+__wur bool QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
+ int *left_shift);
+
+void CalculateActivationRangeFloat(int32_t activation, float *activation_min,
+ float *activation_max);
+
+void CalculateActivationRangeUint8(int32_t activation, const Shape &outputShape, int32_t *act_min,
+ int32_t *act_max);
+
+int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift);
+
+Shape getShape(const ::neurun::graph::operand::Object &o);
+
+uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions);
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
diff --git a/runtimes/neurun/src/kernel/cpu/ReshapeLayer.cc b/runtimes/neurun/src/kernel/cpu/ReshapeLayer.cc
new file mode 100644
index 000000000..377f783e0
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/ReshapeLayer.cc
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReshapeLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+ReshapeLayer::ReshapeLayer()
+ : _inputData(nullptr), _outputData(nullptr), _inputShape(), _outputShape()
+{
+ // DO NOTHING
+}
+
+bool ReshapeLayer::reshapeGeneric()
+{
+ size_t count = sizeOfData(_inputShape.type, _inputShape.dimensions);
+ memcpy(reinterpret_cast<void *>(_outputData), reinterpret_cast<const void *>(_inputData), count);
+ return true;
+}
+
+void ReshapeLayer::configure(uint8_t *inputData, const Shape &inputShape, uint8_t *outputData,
+ const Shape &outputShape)
+{
+ _inputData = inputData;
+ _inputShape = inputShape;
+ _outputData = outputData;
+ _outputShape = outputShape;
+}
+
+void ReshapeLayer::run() { reshapeGeneric(); }
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/cpu/ReshapeLayer.h b/runtimes/neurun/src/kernel/cpu/ReshapeLayer.h
new file mode 100644
index 000000000..395cc1d7f
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/ReshapeLayer.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_KERNEL_CPU_RESHAPELAYER_H__
+#define __NEURUN_KERNEL_CPU_RESHAPELAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class ReshapeLayer : public ::arm_compute::IFunction
+{
+public:
+ ReshapeLayer();
+
+public:
+ bool reshapeGeneric();
+
+ void configure(uint8_t *inputData, const Shape &inputShape, uint8_t *outputData,
+ const Shape &outputShape);
+
+ void run();
+
+private:
+ uint8_t *_inputData;
+ uint8_t *_outputData;
+
+ Shape _inputShape;
+ Shape _outputShape;
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_RESHAPELAYER_H__
diff --git a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc
new file mode 100644
index 000000000..4f5a69f2e
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SoftMaxLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+SoftMaxLayer::SoftMaxLayer()
+ : _inputData(nullptr), _outputData(nullptr), _beta(0.0), _inputShape(), _outputShape(),
+ _inputType(OperandType::SCALAR_FLOAT32)
+{
+ // DO NOTHING
+}
+
+bool SoftMaxLayer::softmaxFloat32()
+{
+ ::tflite::Dims<4> dim;
+ if (getNumberOfDimensions(_inputShape) == 2)
+ {
+ uint32_t batch_size = getSizeOfDimension(_inputShape, 0);
+ uint32_t input_size = getNumberOfElements(_inputShape) / batch_size;
+ Shape shapeIn4D;
+ shapeIn4D.dimensions = {batch_size, 1, 1, input_size};
+ dim = convertShapeToDims(shapeIn4D);
+ }
+ else if (getNumberOfDimensions(_inputShape) == 4)
+ {
+ dim = convertShapeToDims(_inputShape);
+ }
+ else
+ {
+ std::cout << "only 2D and 4D tensors supported" << std::endl;
+ return false;
+ }
+ ::tflite::optimized_ops::Softmax(reinterpret_cast<const float *>(_inputData), dim, _beta,
+ reinterpret_cast<float *>(_outputData), dim);
+ return true;
+}
+
+bool SoftMaxLayer::softmaxQuant8()
+{
+ ::tflite::Dims<4> dim;
+ if (getNumberOfDimensions(_inputShape) == 2)
+ {
+ uint32_t batch_size = getSizeOfDimension(_inputShape, 0);
+ uint32_t input_size = getNumberOfElements(_inputShape) / batch_size;
+ Shape shapeIn4D;
+ shapeIn4D.dimensions = {batch_size, 1, 1, input_size};
+ dim = convertShapeToDims(shapeIn4D);
+ }
+ else if (getNumberOfDimensions(_inputShape) == 4)
+ {
+ dim = convertShapeToDims(_inputShape);
+ }
+ else
+ {
+ std::cout << "only 2D and 4D tensors supported" << std::endl;
+ return false;
+ }
+ if (_outputShape.offset != 0 || _outputShape.scale != 1.f / 256)
+ {
+ std::cout << "incorrect scale / offset for output" << std::endl;
+ return false;
+ }
+ static const int32_t kScaledDiffIntegerBits = 5;
+ const double input_beta_real_multiplier = std::min(
+ 1.0 * _beta * _inputShape.scale * (1 << (31 - kScaledDiffIntegerBits)), (1ll << 31) - 1.0);
+ int32_t input_multiplier = 0;
+ int32_t input_left_shift = 0;
+ if (!QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier, &input_multiplier,
+ &input_left_shift))
+ {
+ return false;
+ }
+ float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift);
+ ::tflite::optimized_ops::Softmax(_inputData, dim, input_multiplier, input_left_shift, diff_min,
+ _outputData, dim);
+ return true;
+}
+
+void SoftMaxLayer::configure(uint8_t *inputData, const Shape &inputShape, const float beta,
+ uint8_t *outputData, const Shape &outputShape)
+{
+ _inputData = inputData;
+ _inputShape = inputShape;
+ _inputType = inputShape.type;
+ _outputData = outputData;
+ _outputShape = outputShape;
+ _beta = beta;
+}
+
+void SoftMaxLayer::run()
+{
+ if (_inputType == OperandType::TENSOR_FLOAT32)
+ {
+ softmaxFloat32();
+ }
+ else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM)
+ {
+ throw std::runtime_error{"SoftMaxLayer : Not tested for TENSOR_QUANT8_ASYMM"};
+ // softmaxQuant8();
+ }
+}
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h
new file mode 100644
index 000000000..8057be52f
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_KERNEL_CPU_SOFTMAXLAYER_H__
+#define __NEURUN_KERNEL_CPU_SOFTMAXLAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class SoftMaxLayer : public ::arm_compute::IFunction
+{
+public:
+ SoftMaxLayer();
+
+public:
+ bool softmaxFloat32();
+
+ bool softmaxQuant8();
+
+ void configure(uint8_t *inputData, const Shape &inputShape, const float beta, uint8_t *outputData,
+ const Shape &outputShape);
+
+ void run();
+
+private:
+ uint8_t *_inputData;
+ uint8_t *_outputData;
+
+ float _beta;
+
+ Shape _inputShape;
+ Shape _outputShape;
+
+ OperandType _inputType;
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_SOFTMAXLAYER_H__
diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.cc b/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.cc
new file mode 100644
index 000000000..00e914732
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
+//
+
+#if 0
+
+#include "TensorConvertFromCommonLayer.h"
+
+#include "internal/nnapi/feature/Reader.h"
+#include "internal/nnapi/feature/View.h"
+
+#include <util/feature/IndexIterator.h>
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+bool TensorConvertFromCommonLayer::convert()
+{
+ auto inputBuffer = _inputTensor->buffer();
+ auto inputSize = _inputTensor->info()->total_size();
+
+ auto outputBuffer = _outputTensor->buffer();
+ auto outputSize = _outputTensor->info()->total_size();
+
+ if (_tensorShape.rank() == 2)
+ {
+ const auto len = _tensorShape.dim(1);
+
+ auto base = reinterpret_cast<const float *>(inputBuffer);
+
+ for (int32_t n = 0; n < len; ++n)
+ {
+ auto from = base + n;
+ auto into =
+ reinterpret_cast<float *>(_outputTensor->ptr_to_element(::arm_compute::Coordinates{n}));
+
+ *into = *from;
+ }
+ }
+ else if (_tensorShape.rank() == 4)
+ {
+ auto featureShape = _tensorShape.asFeature();
+
+ const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize};
+ ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize};
+
+ ::nnfw::util::feature::iterate(featureShape)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, ch, row, col) = value;
+ };
+ }
+}
+
+void TensorConvertFromCommonLayer::configure(::internal::common::Tensor *inputTensor,
+ ::internal::cpu::Tensor *outputTensor,
+ const Shape &tensorShape)
+{
+ _inputTensor = inputTensor;
+ _outputTensor = outputTensor;
+ _tensorShape = tensorShape;
+}
+
+void TensorConvertFromCommonLayer::run() { convert(); }
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif
diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.h b/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.h
new file mode 100644
index 000000000..56f7bcf32
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
+//
+
+#if 0
+
+#ifndef __NEURUN_KERNEL_CPU_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
+#define __NEURUN_KERNEL_CPU_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "internal/Model.h"
+#include "internal/common/Tensor.h"
+#include "internal/cpu.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class TensorConvertFromCommonLayer : public ::arm_compute::IFunction
+{
+public:
+ TensorConvertFromCommonLayer() {}
+
+public:
+ bool convert();
+
+ void configure(::internal::common::Tensor *inputTensor, ::internal::cpu::Tensor *outputTensor,
+ const Shape &tensorShape);
+
+ void run();
+
+private:
+ ::internal::common::Tensor *_inputTensor;
+ ::internal::cpu::Tensor *_outputTensor;
+
+ Shape _tensorShape{1};
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
+
+#endif
diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.cc b/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.cc
new file mode 100644
index 000000000..7d721f494
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
+//
+
+#if 0
+
+#include "TensorConvertToCommonLayer.h"
+
+#include "internal/nnapi/feature/Reader.h"
+#include "internal/nnapi/feature/View.h"
+
+#include <util/feature/IndexIterator.h>
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+bool TensorConvertToCommonLayer::convert()
+{
+ auto inputBuffer = _inputTensor->buffer();
+ auto inputSize = _inputTensor->info()->total_size();
+
+ auto outputBuffer = _outputTensor->buffer();
+ auto outputSize = _outputTensor->info()->total_size();
+
+ if (_tensorShape.rank() == 2)
+ {
+ const auto len = _tensorShape.dim(1);
+
+ auto base = reinterpret_cast<float *>(outputBuffer);
+
+ for (int32_t n = 0; n < len; ++n)
+ {
+ auto from = reinterpret_cast<const float *>(
+ _inputTensor->ptr_to_element(::arm_compute::Coordinates{n}));
+ auto into = base + n;
+
+ *into = *from;
+ }
+ }
+ else if (_tensorShape.rank() == 4)
+ {
+ auto featureShape = _tensorShape.asFeature();
+
+ const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize};
+ ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize};
+
+ ::nnfw::util::feature::iterate(featureShape)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, ch, row, col) = value;
+ };
+ }
+}
+
+void TensorConvertToCommonLayer::configure(::internal::cpu::Tensor *inputTensor,
+ ::internal::common::Tensor *outputTensor,
+ const Shape &tensorShape)
+{
+ _inputTensor = inputTensor;
+ _outputTensor = outputTensor;
+ _tensorShape = tensorShape;
+}
+
+void TensorConvertToCommonLayer::run() { convert(); }
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif
diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.h b/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.h
new file mode 100644
index 000000000..7e96d1aff
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
+//
+
+#if 0
+
+#ifndef __NEURUN_KERNEL_CPU_TENSOR_CONVERT_TO_COMMON_LAYER_H__
+#define __NEURUN_KERNEL_CPU_TENSOR_CONVERT_TO_COMMON_LAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "internal/Model.h"
+#include "internal/common/Tensor.h"
+#include "internal/cpu.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class TensorConvertToCommonLayer : public ::arm_compute::IFunction
+{
+public:
+ TensorConvertToCommonLayer() {}
+
+public:
+ bool convert();
+
+ void configure(::internal::cpu::Tensor *inputTensor, ::internal::common::Tensor *outputTensor,
+ const Shape &tensorShape);
+
+ void run();
+
+private:
+ ::internal::cpu::Tensor *_inputTensor;
+ ::internal::common::Tensor *_outputTensor;
+
+ Shape _tensorShape{1};
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_TENSOR_CONVERT_TO_COMMON_LAYER_H__
+
+#endif