summaryrefslogtreecommitdiff
path: root/runtime/neurun/backend/cpu/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/neurun/backend/cpu/kernel')
-rw-r--r--runtime/neurun/backend/cpu/kernel/AddLayer.cc101
-rw-r--r--runtime/neurun/backend/cpu/kernel/AddLayer.h77
-rw-r--r--runtime/neurun/backend/cpu/kernel/AvgPoolLayer.cc116
-rw-r--r--runtime/neurun/backend/cpu/kernel/AvgPoolLayer.h85
-rw-r--r--runtime/neurun/backend/cpu/kernel/ConcatLayer.cc137
-rw-r--r--runtime/neurun/backend/cpu/kernel/ConcatLayer.h73
-rw-r--r--runtime/neurun/backend/cpu/kernel/ConvolutionLayer.cc140
-rw-r--r--runtime/neurun/backend/cpu/kernel/ConvolutionLayer.h88
-rw-r--r--runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.cc143
-rw-r--r--runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.h90
-rw-r--r--runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.cc119
-rw-r--r--runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.h77
-rw-r--r--runtime/neurun/backend/cpu/kernel/GatherLayer.cc79
-rw-r--r--runtime/neurun/backend/cpu/kernel/GatherLayer.h74
-rw-r--r--runtime/neurun/backend/cpu/kernel/LogisticLayer.cc75
-rw-r--r--runtime/neurun/backend/cpu/kernel/LogisticLayer.h69
-rw-r--r--runtime/neurun/backend/cpu/kernel/MaxPoolLayer.cc116
-rw-r--r--runtime/neurun/backend/cpu/kernel/MaxPoolLayer.h85
-rw-r--r--runtime/neurun/backend/cpu/kernel/MulLayer.cc101
-rw-r--r--runtime/neurun/backend/cpu/kernel/MulLayer.h77
-rw-r--r--runtime/neurun/backend/cpu/kernel/OperationUtils.cc273
-rw-r--r--runtime/neurun/backend/cpu/kernel/OperationUtils.h152
-rw-r--r--runtime/neurun/backend/cpu/kernel/PadLayer.cc76
-rw-r--r--runtime/neurun/backend/cpu/kernel/PadLayer.h75
-rw-r--r--runtime/neurun/backend/cpu/kernel/PermuteLayer.cc71
-rw-r--r--runtime/neurun/backend/cpu/kernel/PermuteLayer.h209
-rw-r--r--runtime/neurun/backend/cpu/kernel/ReshapeLayer.cc54
-rw-r--r--runtime/neurun/backend/cpu/kernel/ReshapeLayer.h65
-rw-r--r--runtime/neurun/backend/cpu/kernel/SoftMaxLayer.cc172
-rw-r--r--runtime/neurun/backend/cpu/kernel/SoftMaxLayer.h71
-rw-r--r--runtime/neurun/backend/cpu/kernel/SubLayer.cc100
-rw-r--r--runtime/neurun/backend/cpu/kernel/SubLayer.h77
32 files changed, 3317 insertions, 0 deletions
diff --git a/runtime/neurun/backend/cpu/kernel/AddLayer.cc b/runtime/neurun/backend/cpu/kernel/AddLayer.cc
new file mode 100644
index 000000000..8a2d872e5
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/AddLayer.cc
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AddLayer.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+void AddLayer::addFloat32()
+{
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ nnfw::cker::BinaryArithmeticOpParam op_params;
+ op_params.float_activation_max = output_activation_max;
+ op_params.float_activation_min = output_activation_min;
+
+ const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) {
+ return a + b;
+ };
+
+ if (!HaveSameShapes(&_lhsDescr, &_rhsDescr))
+ {
+ nnfw::cker::BroadcastBinaryArithmeticOpSlow(
+ op_params, convertToExtendedCkerShape(_lhsDescr), _lhsData.f,
+ convertToExtendedCkerShape(_rhsDescr), _rhsData.f, convertToExtendedCkerShape(_outputDescr),
+ _outputData.f, fn);
+ return;
+ }
+
+ nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr),
+ _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr),
+ _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr),
+ _outputData.f, fn);
+}
+
+void AddLayer::addQuant8()
+{
+ int32_t output_activation_min, output_activation_max;
+ CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
+ &output_activation_max);
+ // nnfw::cker::BinaryArithmeticOpParam op_params;
+ // op_params.quantized_activation_max = output_activation_max;
+ // op_params.quantized_activation_min = output_activation_min;
+
+ // cker quant8 add is not implemented yet
+ throw std::runtime_error{"NYI"};
+}
+
+void AddLayer::configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
+ const TensorDescriptor &rhsDescr, const ir::Activation activation,
+ uint8_t *outputData, const TensorDescriptor &outputDescr)
+{
+ _lhsData.u8 = lhsData;
+ _lhsDescr = lhsDescr;
+ _rhsData.u8 = rhsData;
+ _rhsDescr = rhsDescr;
+ _inputType = lhsDescr.type;
+ _activation = activation;
+ _outputData.u8 = outputData;
+ _outputDescr = outputDescr;
+}
+
+void AddLayer::run()
+{
+ if (_inputType == OperandType::FLOAT32)
+ {
+ addFloat32();
+ }
+ else if (_inputType == OperandType::QUANT8_ASYMM)
+ {
+ addQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/AddLayer.h b/runtime/neurun/backend/cpu/kernel/AddLayer.h
new file mode 100644
index 000000000..7018e4c48
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/AddLayer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_KERNEL_ADDLAYER_H__
+#define __NEURUN_BACKEND_CPU_KERNEL_ADDLAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class AddLayer : public ::neurun::exec::IFunction
+{
+public:
+ AddLayer() : _lhsData(), _rhsData(), _outputData(), _lhsDescr(), _rhsDescr(), _outputDescr()
+ {
+ // DO NOTHING
+ }
+
+public:
+ void addFloat32();
+
+ void addQuant8();
+
+ void configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
+ const TensorDescriptor &rhsDescr, const ir::Activation activation,
+ uint8_t *outputData, const TensorDescriptor &outputDescr);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ DataPtr _lhsData;
+ DataPtr _rhsData;
+ DataPtr _outputData;
+
+ TensorDescriptor _lhsDescr;
+ TensorDescriptor _rhsDescr;
+ TensorDescriptor _outputDescr;
+
+ ir::Activation _activation{ir::Activation::NONE};
+
+ OperandType _inputType{OperandType::FLOAT32};
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_KERNEL_ADDLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.cc b/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.cc
new file mode 100644
index 000000000..389955796
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.cc
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AvgPoolLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/AveragePool.h>
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+#define AVGPOOLING_PARAMETERS \
+ nnfw::cker::PoolParams op_params; \
+ op_params.stride_height = _strideHeight; \
+ op_params.stride_width = _strideWidth; \
+ op_params.filter_height = _kernelHeight; \
+ op_params.filter_width = _kernelWidth; \
+ op_params.padding_values.height = (int8_t)_paddingTop; \
+ op_params.padding_values.width = (int8_t)_paddingLeft;
+
+AvgPoolLayer::AvgPoolLayer()
+ : _inputData(), _outputData(), _inputDescr(), _outputDescr(), _paddingLeft(0), _paddingTop(0),
+ _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0),
+ _kernelHeight(0), _activation(ir::Activation::NONE), _inputType(OperandType::FLOAT32)
+{
+ // DO NOTHING
+}
+
+void AvgPoolLayer::averagePoolFloat32()
+{
+ AVGPOOLING_PARAMETERS
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ nnfw::cker::AveragePool(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f,
+ convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f);
+}
+void AvgPoolLayer::averagePoolQuant8()
+{
+ AVGPOOLING_PARAMETERS
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
+ &output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+
+ nnfw::cker::AveragePool(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.u8,
+ convertTensorDescriptorToCkerShape(_outputDescr), _outputData.u8);
+}
+
+void AvgPoolLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr,
+ const uint32_t paddingLeft, const uint32_t paddingRight,
+ const uint32_t paddingTop, const uint32_t paddingBottom,
+ const uint32_t strideWidth, const uint32_t strideHeight,
+ const uint32_t kernelWidth, const uint32_t kernelHeight,
+ const ir::Activation activation, uint8_t *outputData,
+ const TensorDescriptor outputDescr)
+{
+ _inputData.u8 = inputData;
+ _inputDescr = inputDescr;
+ _inputType = inputDescr.type;
+ _paddingLeft = paddingLeft;
+ _paddingRight = paddingRight;
+ _paddingTop = paddingTop;
+ _paddingBottom = paddingBottom;
+ _strideWidth = strideWidth;
+ _strideHeight = strideHeight;
+ _kernelWidth = kernelWidth;
+ _kernelHeight = kernelHeight;
+ _activation = activation;
+ _outputData.u8 = outputData;
+ _outputDescr = outputDescr;
+}
+
+void AvgPoolLayer::run()
+{
+ if (_inputType == OperandType::FLOAT32)
+ {
+ averagePoolFloat32();
+ }
+ else if (_inputType == OperandType::QUANT8_ASYMM)
+ {
+ averagePoolQuant8();
+ }
+}
+
+#undef AVGPOOLING_PARAMETERS
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.h b/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.h
new file mode 100644
index 000000000..6339efa41
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_KERNEL_AVGPOOLLAYER_H__
+#define __NEURUN_BACKEND_CPU_KERNEL_AVGPOOLLAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class AvgPoolLayer : public ::neurun::exec::IFunction
+{
+public:
+ AvgPoolLayer();
+
+public:
+ void averagePoolFloat32();
+
+ void averagePoolQuant8();
+
+ void configure(uint8_t *inputData, const TensorDescriptor inputDescr, const uint32_t paddingLeft,
+ const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t kernelWidth,
+ const uint32_t kernelHeight, const ir::Activation activation, uint8_t *outputData,
+ const TensorDescriptor outputDescr);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ DataPtr _inputData;
+ DataPtr _outputData;
+
+ TensorDescriptor _inputDescr;
+ TensorDescriptor _outputDescr;
+
+ uint32_t _paddingLeft;
+ uint32_t _paddingTop;
+ uint32_t _paddingRight;
+ uint32_t _paddingBottom;
+
+ uint32_t _strideWidth;
+ uint32_t _strideHeight;
+ uint32_t _kernelWidth;
+ uint32_t _kernelHeight;
+
+ ir::Activation _activation;
+
+ OperandType _inputType;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_KERNEL_AVGPOOLLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/ConcatLayer.cc b/runtime/neurun/backend/cpu/kernel/ConcatLayer.cc
new file mode 100644
index 000000000..471c9b3bb
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/ConcatLayer.cc
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConcatLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Concatenation.h>
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+ConcatLayer::ConcatLayer()
+ : _inputDataPtrs(), _outputData(), _axis(0), _inputDescriptors(), _outputDescr(),
+ _inputType(OperandType::FLOAT32)
+{
+ // DO NOTHING
+}
+
+void ConcatLayer::concatenationFloat32()
+{
+ uint32_t num_inputs = _inputDescriptors.size();
+
+ nnfw::cker::ConcatenationParams op_params;
+ op_params.axis = _axis;
+ op_params.inputs_count = num_inputs;
+
+ std::vector<nnfw::cker::Shape *> inputDimsPtr;
+ std::vector<nnfw::cker::Shape> inputDims;
+ inputDimsPtr.reserve(num_inputs);
+ inputDims.reserve(num_inputs);
+
+ for (uint32_t i = 0; i < num_inputs; i++)
+ {
+ inputDims.push_back(convertTensorDescriptorToCkerShape(_inputDescriptors[i]));
+ inputDimsPtr.push_back(&inputDims[i]);
+ }
+
+ std::vector<const float *> inputFloatPtrs;
+
+ for (auto ptr : _inputDataPtrs)
+ {
+ inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(ptr));
+ }
+
+ nnfw::cker::Concatenation<float>(op_params, inputDimsPtr.data(), inputFloatPtrs.data(),
+ convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f);
+}
+void ConcatLayer::concatenationQuant8()
+{
+ uint32_t num_inputs = _inputDescriptors.size();
+
+ std::vector<int32_t> input_zeropoints(num_inputs);
+ std::vector<float> input_scales(num_inputs);
+ for (uint32_t i = 0; i < num_inputs; i++)
+ {
+ input_zeropoints[i] = _inputDescriptors[i].offset;
+ input_scales[i] = _inputDescriptors[i].scale;
+ }
+
+ nnfw::cker::ConcatenationParams op_params;
+ op_params.axis = _axis;
+ op_params.inputs_count = num_inputs;
+ op_params.input_zeropoint = input_zeropoints.data();
+ op_params.input_scale = input_scales.data();
+ op_params.output_zeropoint = _outputDescr.offset;
+ op_params.output_scale = _outputDescr.scale;
+
+ std::vector<nnfw::cker::Shape *> inputDimsPtr;
+ std::vector<nnfw::cker::Shape> inputDims;
+ inputDimsPtr.reserve(num_inputs);
+ inputDims.reserve(num_inputs);
+ for (uint32_t i = 0; i < num_inputs; i++)
+ {
+ inputDims.push_back(convertTensorDescriptorToCkerShape(_inputDescriptors[i]));
+ inputDimsPtr.push_back(&inputDims[i]);
+ }
+
+ nnfw::cker::Concatenation<uint8_t>(op_params, inputDimsPtr.data(), _inputDataPtrs.data(),
+ convertTensorDescriptorToCkerShape(_outputDescr),
+ _outputData.u8);
+}
+
+void ConcatLayer::configure(const std::vector<const uint8_t *> &inputDataPtrs,
+ const std::vector<TensorDescriptor> &inputDescriptors, int32_t axis,
+ uint8_t *outputData, const TensorDescriptor outputDescr)
+{
+ _inputDataPtrs = inputDataPtrs;
+
+ for (auto inputDescr : inputDescriptors)
+ {
+ _inputDescriptors.emplace_back(inputDescr);
+ _inputType = inputDescr.type;
+ }
+
+ _axis = axis;
+
+ _outputData.u8 = outputData;
+ _outputDescr = outputDescr;
+}
+
+void ConcatLayer::run()
+{
+ if (_inputType == OperandType::FLOAT32)
+ {
+ concatenationFloat32();
+ }
+ else if (_inputType == OperandType::QUANT8_ASYMM)
+ {
+ concatenationQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/ConcatLayer.h b/runtime/neurun/backend/cpu/kernel/ConcatLayer.h
new file mode 100644
index 000000000..048aa4208
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/ConcatLayer.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_KERNEL_CONCATLAYER_H__
+#define __NEURUN_BACKEND_CPU_KERNEL_CONCATLAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class ConcatLayer : public ::neurun::exec::IFunction
+{
+public:
+ ConcatLayer();
+
+public:
+ void concatenationFloat32();
+
+ void concatenationQuant8();
+
+ void configure(const std::vector<const uint8_t *> &inputDataPtrs,
+ const std::vector<TensorDescriptor> &inputDescriptors, int32_t axis,
+ uint8_t *outputData, const TensorDescriptor outputDescr);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ std::vector<const uint8_t *> _inputDataPtrs;
+ DataPtr _outputData;
+
+ int32_t _axis;
+
+ std::vector<TensorDescriptor> _inputDescriptors;
+ TensorDescriptor _outputDescr;
+
+ OperandType _inputType;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_KERNEL_CONCATLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.cc b/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.cc
new file mode 100644
index 000000000..2fdb0baf7
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.cc
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+#include <cker/operation/Conv.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+ConvolutionLayer::ConvolutionLayer()
+ : _inputData(), _kernelData(), _outputData(), _biasData(), _inputDescr(), _kernelDescr(),
+ _outputDescr(), _biasDescr(), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
+ _paddingBottom(0), _strideWidth(0), _strideHeight(0), _activation(ir::Activation::NONE),
+ _inputType(OperandType::FLOAT32)
+{
+ // DO NOTHING
+}
+
+void ConvolutionLayer::convFloat32()
+{
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+
+ nnfw::cker::ConvParams op_params;
+ op_params.padding_values.width = _paddingLeft;
+ op_params.padding_values.height = _paddingTop;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = 1;
+ op_params.dilation_height_factor = 1;
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ nnfw::cker::Conv(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f,
+ convertTensorDescriptorToCkerShape(_kernelDescr), _kernelData.f,
+ convertTensorDescriptorToCkerShape(_biasDescr), _biasData.f,
+ convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f);
+}
+
+void ConvolutionLayer::convQuant8()
+{
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
+ &output_activation_max);
+
+ float real_multiplier = 0.0;
+ int32_t output_multiplier = 0;
+ int32_t output_shift = 0;
+ GetQuantizedConvolutionMultiplier(_inputDescr, _kernelDescr, _biasDescr, _outputDescr,
+ &real_multiplier);
+ QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ nnfw::cker::ConvParams op_params;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = 1;
+ op_params.dilation_height_factor = 1;
+ op_params.padding_values.width = _paddingLeft;
+ op_params.padding_values.height = _paddingTop;
+ op_params.input_offset = -_inputDescr.offset;
+ op_params.weights_offset = -_kernelDescr.offset;
+ op_params.output_offset = _outputDescr.offset;
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+
+ nnfw::cker::Conv(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.u8,
+ convertTensorDescriptorToCkerShape(_kernelDescr), _kernelData.u8,
+ convertTensorDescriptorToCkerShape(_biasDescr), _biasData.i32,
+ convertTensorDescriptorToCkerShape(_outputDescr), _outputData.u8);
+}
+
+void ConvolutionLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr,
+ uint8_t *kernelData, const TensorDescriptor kernelDescr,
+ uint8_t *biasData, const TensorDescriptor biasDescr,
+ const uint32_t paddingLeft, const uint32_t paddingRight,
+ const uint32_t paddingTop, const uint32_t paddingBottom,
+ const uint32_t strideWidth, const uint32_t strideHeight,
+ const ir::Activation activation, uint8_t *outputData,
+ const TensorDescriptor outputDescr)
+{
+ _inputData.u8 = inputData;
+ _inputDescr = inputDescr;
+ _inputType = inputDescr.type;
+ _kernelData.u8 = kernelData;
+ _kernelDescr = kernelDescr;
+ _biasData.u8 = biasData;
+ _biasDescr = biasDescr;
+ _paddingLeft = paddingLeft;
+ _paddingRight = paddingRight;
+ _paddingTop = paddingTop;
+ _paddingBottom = paddingBottom;
+ _strideWidth = strideWidth;
+ _strideHeight = strideHeight;
+ _activation = activation;
+ _outputData.u8 = outputData;
+ _outputDescr = outputDescr;
+}
+
+void ConvolutionLayer::run()
+{
+ if (_inputType == OperandType::FLOAT32)
+ {
+ convFloat32();
+ }
+ else if (_inputType == OperandType::QUANT8_ASYMM)
+ {
+ convQuant8();
+ }
+}
+
+#undef ANDROID_NN_CONV_PARAMETERS
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.h b/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.h
new file mode 100644
index 000000000..16669f316
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_KERNEL_CONVOLUTIONLAYER_H__
+#define __NEURUN_BACKEND_CPU_KERNEL_CONVOLUTIONLAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class ConvolutionLayer : public ::neurun::exec::IFunction
+{
+public:
+ ConvolutionLayer();
+
+public:
+ void convFloat32();
+
+ void convQuant8();
+
+ void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *kernelData,
+ const TensorDescriptor kernelDescr, uint8_t *biasData,
+ const TensorDescriptor biasDescr, const uint32_t paddingLeft,
+ const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
+ const ir::Activation activation, uint8_t *outputData,
+ const TensorDescriptor outputDescr);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ DataPtr _inputData;
+ DataPtr _kernelData;
+ DataPtr _outputData;
+ DataPtr _biasData;
+
+ TensorDescriptor _inputDescr;
+ TensorDescriptor _kernelDescr;
+ TensorDescriptor _outputDescr;
+ TensorDescriptor _biasDescr;
+
+ uint32_t _paddingLeft;
+ uint32_t _paddingTop;
+ uint32_t _paddingRight;
+ uint32_t _paddingBottom;
+
+ uint32_t _strideWidth;
+ uint32_t _strideHeight;
+
+ ir::Activation _activation;
+
+ OperandType _inputType;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_KERNEL_CONVOLUTIONLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.cc b/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.cc
new file mode 100644
index 000000000..e33e3465e
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.cc
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthwiseConvolutionLayer.h"
+
+#include <cker/operation/DepthwiseConv.h>
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+DepthwiseConvolutionLayer::DepthwiseConvolutionLayer()
+ : _inputData(), _kernelData(), _outputData(), _biasData(), _inputDescr(), _kernelDescr(),
+ _outputDescr(), _biasDescr(), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
+ _paddingBottom(0), _strideWidth(0), _strideHeight(0), _multiplier(0),
+ _activation(ir::Activation::NONE), _inputType(OperandType::FLOAT32)
+{
+ // DO NOTHING
+}
+
+void DepthwiseConvolutionLayer::convFloat32()
+{
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+
+ nnfw::cker::DepthwiseConvParams op_params;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = 1;
+ op_params.dilation_height_factor = 1;
+ op_params.padding_values.width = _paddingLeft;
+ op_params.padding_values.height = _paddingTop;
+ op_params.depth_multiplier = _multiplier;
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ nnfw::cker::DepthwiseConv(op_params, convertTensorDescriptorToCkerShape(_inputDescr),
+ _inputData.f, convertTensorDescriptorToCkerShape(_kernelDescr),
+ _kernelData.f, convertTensorDescriptorToCkerShape(_biasDescr),
+ _biasData.f, convertTensorDescriptorToCkerShape(_outputDescr),
+ _outputData.f);
+}
+
+void DepthwiseConvolutionLayer::convQuant8()
+{
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
+ &output_activation_max);
+
+ float real_multiplier = 0.0;
+ int32_t output_multiplier = 0;
+ int32_t output_shift = 0;
+ GetQuantizedConvolutionMultiplier(_inputDescr, _kernelDescr, _biasDescr, _outputDescr,
+ &real_multiplier);
+ QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ nnfw::cker::DepthwiseConvParams op_params;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = 1;
+ op_params.dilation_height_factor = 1;
+ op_params.padding_values.width = _paddingLeft;
+ op_params.padding_values.height = _paddingTop;
+ op_params.depth_multiplier = _multiplier;
+ op_params.input_offset = -_inputDescr.offset;
+ op_params.weights_offset = -_kernelDescr.offset;
+ op_params.output_offset = _outputDescr.offset;
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+
+ nnfw::cker::DepthwiseConv(op_params, convertTensorDescriptorToCkerShape(_inputDescr),
+ _inputData.u8, convertTensorDescriptorToCkerShape(_kernelDescr),
+ _kernelData.u8, convertTensorDescriptorToCkerShape(_biasDescr),
+ _biasData.i32, convertTensorDescriptorToCkerShape(_outputDescr),
+ _outputData.u8);
+}
+
+void DepthwiseConvolutionLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr,
+ uint8_t *kernelData, const TensorDescriptor kernelDescr,
+ uint8_t *biasData, const TensorDescriptor biasDescr,
+ const uint32_t paddingLeft, const uint32_t paddingRight,
+ const uint32_t paddingTop, const uint32_t paddingBottom,
+ const uint32_t strideWidth, const uint32_t strideHeight,
+ const uint32_t multiplier,
+ const ir::Activation activation, uint8_t *outputData,
+ const TensorDescriptor outputDescr)
+{
+ _inputData.u8 = inputData;
+ _inputDescr = inputDescr;
+ _inputType = inputDescr.type;
+ _kernelData.u8 = kernelData;
+ _kernelDescr = kernelDescr;
+ _biasData.u8 = biasData;
+ _biasDescr = biasDescr;
+ _paddingLeft = paddingLeft;
+ _paddingRight = paddingRight;
+ _paddingTop = paddingTop;
+ _paddingBottom = paddingBottom;
+ _strideWidth = strideWidth;
+ _strideHeight = strideHeight;
+ _multiplier = multiplier;
+ _activation = activation;
+ _outputData.u8 = outputData;
+ _outputDescr = outputDescr;
+}
+
+void DepthwiseConvolutionLayer::run()
+{
+ if (_inputType == OperandType::FLOAT32)
+ {
+ convFloat32();
+ }
+ else if (_inputType == OperandType::QUANT8_ASYMM)
+ {
+ convQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.h b/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.h
new file mode 100644
index 000000000..575cc0ab1
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_KERNEL_CPU_DEPTHWISECONVOLUTIONLAYER_H__
+#define __NEURUN_KERNEL_CPU_DEPTHWISECONVOLUTIONLAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class DepthwiseConvolutionLayer : public ::neurun::exec::IFunction
+{
+public:
+ DepthwiseConvolutionLayer();
+
+public:
+ void convFloat32();
+
+ void convQuant8();
+
+ void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *kernelData,
+ const TensorDescriptor kernelDescr, uint8_t *biasData,
+ const TensorDescriptor biasDescr, const uint32_t paddingLeft,
+ const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
+ const uint32_t multiplier, const ir::Activation activation, uint8_t *outputData,
+ const TensorDescriptor outputDescr);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ DataPtr _inputData;
+ DataPtr _kernelData;
+ DataPtr _outputData;
+ DataPtr _biasData;
+
+ TensorDescriptor _inputDescr;
+ TensorDescriptor _kernelDescr;
+ TensorDescriptor _outputDescr;
+ TensorDescriptor _biasDescr;
+
+ uint32_t _paddingLeft;
+ uint32_t _paddingTop;
+ uint32_t _paddingRight;
+ uint32_t _paddingBottom;
+
+ uint32_t _strideWidth;
+ uint32_t _strideHeight;
+
+ uint32_t _multiplier;
+
+ ir::Activation _activation;
+
+ OperandType _inputType;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // backend
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_DEPTHWISECONVOLUTIONLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.cc b/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.cc
new file mode 100644
index 000000000..055f71590
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.cc
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include <cker/operation/FullyConnected.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+FullyConnectedLayer::FullyConnectedLayer()
+ : _inputData(), _weightsData(), _biasData(), _outputData(), _inputDescr(), _weightsDescr(),
+ _biasDescr(), _outputDescr(), _activation(ir::Activation::NONE),
+ _inputType(OperandType::FLOAT32)
+{
+ // DO NOTHING
+}
+
+void FullyConnectedLayer::fullyConnectedFloat32()
+{
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+
+ nnfw::cker::FullyConnectedParams op_params;
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ nnfw::cker::FullyConnected(op_params, convertToExtendedCkerShape(_inputDescr), _inputData.f,
+ convertToExtendedCkerShape(_weightsDescr), _weightsData.f,
+ convertToExtendedCkerShape(_biasDescr), _biasData.f,
+ convertToExtendedCkerShape(_outputDescr), _outputData.f);
+}
+
+// executionMutex is used to protect concurrent access of non-threadsafe resources
+// like gemmlowp::GemmContext.
+void FullyConnectedLayer::fullyConnectedQuant8()
+{
+ float real_multiplier = 0.0;
+ int32_t output_multiplier = 0;
+ int32_t output_shift = 0;
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ GetQuantizedConvolutionMultiplier(_inputDescr, _weightsDescr, _biasDescr, _outputDescr,
+ &real_multiplier);
+ QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+ CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
+ &output_activation_max);
+
+ nnfw::cker::FullyConnectedParams op_params;
+ op_params.input_offset = -_inputDescr.offset;
+ op_params.weights_offset = -_weightsDescr.offset;
+ op_params.output_offset = _outputDescr.offset;
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+
+ nnfw::cker::FullyConnected(op_params, convertToExtendedCkerShape(_inputDescr), _inputData.u8,
+ convertToExtendedCkerShape(_weightsDescr), _weightsData.u8,
+ convertToExtendedCkerShape(_biasDescr), _biasData.i32,
+ convertToExtendedCkerShape(_outputDescr), _outputData.u8);
+}
+
+void FullyConnectedLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr,
+ uint8_t *weightsData, const TensorDescriptor weightsDescr,
+ uint8_t *biasData, const TensorDescriptor biasDescr,
+ ir::Activation activation, uint8_t *outputData,
+ const TensorDescriptor outputDescr)
+{
+ _inputData.u8 = inputData;
+ _inputDescr = inputDescr;
+ _inputType = inputDescr.type;
+ _weightsData.u8 = weightsData;
+ _weightsDescr = weightsDescr;
+ _biasData.u8 = biasData;
+ _biasDescr = biasDescr;
+ _activation = activation;
+ _outputData.u8 = outputData;
+ _outputDescr = outputDescr;
+}
+
+void FullyConnectedLayer::run()
+{
+ if (_inputType == OperandType::FLOAT32)
+ {
+ fullyConnectedFloat32();
+ }
+ else if (_inputType == OperandType::QUANT8_ASYMM)
+ {
+ fullyConnectedQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.h b/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.h
new file mode 100644
index 000000000..9fdc393a4
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_KERNEL_FULLYCONNECTEDLAYER_H__
+#define __NEURUN_BACKEND_CPU_KERNEL_FULLYCONNECTEDLAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class FullyConnectedLayer : public ::neurun::exec::IFunction
+{
+public:
+ FullyConnectedLayer();
+
+public:
+ void fullyConnectedFloat32();
+
+ void fullyConnectedQuant8();
+
+ void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *weightsData,
+ const TensorDescriptor weightsDescr, uint8_t *biasData,
+ const TensorDescriptor biasDescr, ir::Activation activation, uint8_t *outputData,
+ const TensorDescriptor outputDescr);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ DataPtr _inputData;
+ DataPtr _weightsData;
+ DataPtr _biasData;
+ DataPtr _outputData;
+
+ TensorDescriptor _inputDescr;
+ TensorDescriptor _weightsDescr;
+ TensorDescriptor _biasDescr;
+ TensorDescriptor _outputDescr;
+
+ ir::Activation _activation;
+
+ OperandType _inputType;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_KERNEL_FULLYCONNECTEDLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/GatherLayer.cc b/runtime/neurun/backend/cpu/kernel/GatherLayer.cc
new file mode 100644
index 000000000..b29acba79
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/GatherLayer.cc
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GatherLayer.h"
+
+#include <cker/operation/Gather.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+void GatherLayer::configure(uint8_t *inputData, const TensorDescriptor &inputDescr,
+ uint8_t *indicesData, const TensorDescriptor &indicesDescr,
+ uint8_t *outputData, const TensorDescriptor &outputDescr, int32_t axis)
+{
+ _inputData.u8 = inputData;
+ _inputDescr = inputDescr;
+ _indicesData.u8 = indicesData;
+ _indicesDescr = indicesDescr;
+ _axis = axis;
+ _inputType = inputDescr.type;
+ _outputData.u8 = outputData;
+ _outputDescr = outputDescr;
+}
+
+void GatherLayer::run()
+{
+ nnfw::cker::GatherParams op_params;
+ op_params.axis = _axis;
+
+ switch (_inputType)
+ {
+ case OperandType::FLOAT32:
+ nnfw::cker::Gather<float>(op_params, convertTensorDescriptorToCkerShape(_inputDescr),
+ _inputData.f, convertTensorDescriptorToCkerShape(_indicesDescr),
+ _indicesData.i32, convertTensorDescriptorToCkerShape(_outputDescr),
+ _outputData.f);
+ break;
+ case OperandType::QUANT8_ASYMM:
+ nnfw::cker::Gather<uint8_t>(op_params, convertTensorDescriptorToCkerShape(_inputDescr),
+ _inputData.u8, convertTensorDescriptorToCkerShape(_indicesDescr),
+ _indicesData.i32,
+ convertTensorDescriptorToCkerShape(_outputDescr), _outputData.u8);
+ break;
+ case OperandType::INT32:
+ nnfw::cker::Gather<int32_t>(
+ op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.i32,
+ convertTensorDescriptorToCkerShape(_indicesDescr), _indicesData.i32,
+ convertTensorDescriptorToCkerShape(_outputDescr), _outputData.i32);
+ break;
+ default:
+ throw std::runtime_error("Gather NYI for this operand type!");
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/GatherLayer.h b/runtime/neurun/backend/cpu/kernel/GatherLayer.h
new file mode 100644
index 000000000..af4f8b8f6
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/GatherLayer.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_KERNEL_GATHERLAYER_H__
+#define __NEURUN_BACKEND_CPU_KERNEL_GATHERLAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class GatherLayer : public ::neurun::exec::IFunction
+{
+public:
+ GatherLayer()
+ : _inputData{nullptr}, _indicesData{nullptr}, _outputData{nullptr}, _axis{-1},
+ _inputType{OperandType::FLOAT32}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void configure(uint8_t *inputData, const TensorDescriptor &inputDescr, uint8_t *indicesData,
+ const TensorDescriptor &indicesDescr, uint8_t *outputData,
+ const TensorDescriptor &outputDescr, int32_t axis);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ DataPtr _inputData;
+ DataPtr _indicesData;
+ DataPtr _outputData;
+
+ TensorDescriptor _inputDescr;
+ TensorDescriptor _indicesDescr;
+ TensorDescriptor _outputDescr;
+
+ int32_t _axis;
+ OperandType _inputType;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_KERNEL_GATHERLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/LogisticLayer.cc b/runtime/neurun/backend/cpu/kernel/LogisticLayer.cc
new file mode 100644
index 000000000..d9916964e
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/LogisticLayer.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LogisticLayer.h"
+
+#include <cker/operation/Logistic.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+LogisticLayer::LogisticLayer()
+ : _inputData(), _outputData(), _inputDescr(), _outputDescr(), _inputType(OperandType::FLOAT32)
+{
+ // DO NOTHING
+}
+
+void LogisticLayer::logisticFloat32()
+{
+ nnfw::cker::Logistic(convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f,
+ convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f);
+}
+
+void LogisticLayer::logisticQuant8()
+{
+ // cker quant8 logistic is not implemented yet
+ throw std::runtime_error{"NYI"};
+}
+
+void LogisticLayer::configure(uint8_t *inputData, const TensorDescriptor &inputDescr,
+ uint8_t *outputData, const TensorDescriptor &outputDescr)
+{
+ _inputData.u8 = inputData;
+ _inputDescr = inputDescr;
+ _inputType = inputDescr.type;
+ _outputData.u8 = outputData;
+ _outputDescr = outputDescr;
+}
+
+void LogisticLayer::run()
+{
+ if (_inputType == OperandType::FLOAT32)
+ {
+ logisticFloat32();
+ }
+ else if (_inputType == OperandType::QUANT8_ASYMM)
+ {
+ logisticQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/LogisticLayer.h b/runtime/neurun/backend/cpu/kernel/LogisticLayer.h
new file mode 100644
index 000000000..33fcd6fed
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/LogisticLayer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_KERNEL_LOGISTICLAYER_H__
+#define __NEURUN_BACKEND_CPU_KERNEL_LOGISTICLAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class LogisticLayer : public ::neurun::exec::IFunction
+{
+public:
+ LogisticLayer();
+
+public:
+ void logisticFloat32();
+
+ void logisticQuant8();
+
+ void configure(uint8_t *inputData, const TensorDescriptor &inputDescr, uint8_t *outputData,
+ const TensorDescriptor &outputDescr);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ DataPtr _inputData;
+ DataPtr _outputData;
+
+ TensorDescriptor _inputDescr;
+ TensorDescriptor _outputDescr;
+
+ OperandType _inputType;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_KERNEL_LOGISTICLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.cc b/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.cc
new file mode 100644
index 000000000..095cd6d1d
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.cc
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MaxPoolLayer.h"
+
+#include <cker/operation/MaxPool.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+#define MAXPOOLING_PARAMETERS \
+ nnfw::cker::PoolParams op_params; \
+ op_params.stride_height = _strideHeight; \
+ op_params.stride_width = _strideWidth; \
+ op_params.filter_height = _kernelHeight; \
+ op_params.filter_width = _kernelWidth; \
+ op_params.padding_values.height = (int8_t)_paddingTop; \
+ op_params.padding_values.width = (int8_t)_paddingLeft;
+
+MaxPoolLayer::MaxPoolLayer()
+ : _inputData(), _outputData(), _inputDescr(), _outputDescr(), _paddingLeft(0), _paddingTop(0),
+ _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0),
+ _kernelHeight(0), _activation(ir::Activation::NONE), _inputType(OperandType::FLOAT32)
+{
+ // DO NOTHING
+}
+
+void MaxPoolLayer::maxPoolFloat32()
+{
+ MAXPOOLING_PARAMETERS
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ nnfw::cker::MaxPool(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f,
+ convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f);
+}
+void MaxPoolLayer::maxPoolQuant8()
+{
+ MAXPOOLING_PARAMETERS
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
+ &output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+
+ nnfw::cker::MaxPool(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.u8,
+ convertTensorDescriptorToCkerShape(_outputDescr), _outputData.u8);
+}
+
+void MaxPoolLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr,
+ const uint32_t paddingLeft, const uint32_t paddingRight,
+ const uint32_t paddingTop, const uint32_t paddingBottom,
+ const uint32_t strideWidth, const uint32_t strideHeight,
+ const uint32_t kernelWidth, const uint32_t kernelHeight,
+ const ir::Activation activation, uint8_t *outputData,
+ const TensorDescriptor outputDescr)
+{
+ _inputData.u8 = inputData;
+
+ _inputDescr = inputDescr;
+ _inputType = inputDescr.type;
+ _paddingLeft = paddingLeft;
+ _paddingRight = paddingRight;
+ _paddingTop = paddingTop;
+ _paddingBottom = paddingBottom;
+ _strideWidth = strideWidth;
+ _strideHeight = strideHeight;
+ _kernelWidth = kernelWidth;
+ _kernelHeight = kernelHeight;
+ _activation = activation;
+ _outputData.u8 = outputData;
+ _outputDescr = outputDescr;
+}
+
+void MaxPoolLayer::run()
+{
+ if (_inputType == OperandType::FLOAT32)
+ {
+ maxPoolFloat32();
+ }
+ else if (_inputType == OperandType::QUANT8_ASYMM)
+ {
+ maxPoolQuant8();
+ }
+}
+
+#undef MAXPOOLING_PARAMETERS
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.h b/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.h
new file mode 100644
index 000000000..88a574c42
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_KERNEL_MAXPOOLLAYER_H__
+#define __NEURUN_BACKEND_CPU_KERNEL_MAXPOOLLAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class MaxPoolLayer : public ::neurun::exec::IFunction
+{
+public:
+ MaxPoolLayer();
+
+public:
+ void maxPoolFloat32();
+
+ void maxPoolQuant8();
+
+ void configure(uint8_t *inputData, const TensorDescriptor inputDescr, const uint32_t paddingLeft,
+ const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t kernelWidth,
+ const uint32_t kernelHeight, const ir::Activation activation, uint8_t *outputData,
+ const TensorDescriptor outputDescr);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ DataPtr _inputData;
+ DataPtr _outputData;
+
+ TensorDescriptor _inputDescr;
+ TensorDescriptor _outputDescr;
+
+ uint32_t _paddingLeft;
+ uint32_t _paddingTop;
+ uint32_t _paddingRight;
+ uint32_t _paddingBottom;
+
+ uint32_t _strideWidth;
+ uint32_t _strideHeight;
+ uint32_t _kernelWidth;
+ uint32_t _kernelHeight;
+
+ ir::Activation _activation;
+
+ OperandType _inputType;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_KERNEL_MAXPOOLLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/MulLayer.cc b/runtime/neurun/backend/cpu/kernel/MulLayer.cc
new file mode 100644
index 000000000..d6ce2cfad
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/MulLayer.cc
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MulLayer.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+void MulLayer::mulFloat32()
+{
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ nnfw::cker::BinaryArithmeticOpParam op_params;
+ op_params.float_activation_max = output_activation_max;
+ op_params.float_activation_min = output_activation_min;
+
+ const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) {
+ return a * b;
+ };
+
+ if (!HaveSameShapes(&_lhsDescr, &_rhsDescr))
+ {
+ nnfw::cker::BroadcastBinaryArithmeticOpSlow(
+ op_params, convertToExtendedCkerShape(_lhsDescr), _lhsData.f,
+ convertToExtendedCkerShape(_rhsDescr), _rhsData.f, convertToExtendedCkerShape(_outputDescr),
+ _outputData.f, fn);
+ return;
+ }
+
+ nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr),
+ _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr),
+ _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr),
+ _outputData.f, fn);
+}
+
+void MulLayer::mulQuant8()
+{
+ int32_t output_activation_min, output_activation_max;
+ CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
+ &output_activation_max);
+ // nnfw::cker::BinaryArithmeticOpParam op_params;
+ // op_params.quantized_activation_max = output_activation_max;
+ // op_params.quantized_activation_min = output_activation_min;
+
+ // cker quant8 mul is not implemented yet
+ throw std::runtime_error{"Mull NYI for quantized"};
+}
+
+void MulLayer::configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
+ const TensorDescriptor &rhsDescr, const ir::Activation activation,
+ uint8_t *outputData, const TensorDescriptor &outputDescr)
+{
+ _lhsData.u8 = lhsData;
+ _lhsDescr = lhsDescr;
+ _rhsData.u8 = rhsData;
+ _rhsDescr = rhsDescr;
+ _inputType = lhsDescr.type;
+ _activation = activation;
+ _outputData.u8 = outputData;
+ _outputDescr = outputDescr;
+}
+
+void MulLayer::run()
+{
+ if (_inputType == OperandType::FLOAT32)
+ {
+ mulFloat32();
+ }
+ else if (_inputType == OperandType::QUANT8_ASYMM)
+ {
+ mulQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/MulLayer.h b/runtime/neurun/backend/cpu/kernel/MulLayer.h
new file mode 100644
index 000000000..05fc3052f
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/MulLayer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__
+#define __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class MulLayer : public ::neurun::exec::IFunction
+{
+public:
+ MulLayer() : _lhsData(), _rhsData(), _outputData(), _lhsDescr(), _rhsDescr(), _outputDescr()
+ {
+ // DO NOTHING
+ }
+
+public:
+ void mulFloat32();
+
+ void mulQuant8();
+
+ void configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
+ const TensorDescriptor &rhsDescr, const ir::Activation activation,
+ uint8_t *outputData, const TensorDescriptor &outputDescr);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ DataPtr _lhsData;
+ DataPtr _rhsData;
+ DataPtr _outputData;
+
+ TensorDescriptor _lhsDescr;
+ TensorDescriptor _rhsDescr;
+ TensorDescriptor _outputDescr;
+
+ ir::Activation _activation{ir::Activation::NONE};
+
+ OperandType _inputType{OperandType::FLOAT32};
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/OperationUtils.cc b/runtime/neurun/backend/cpu/kernel/OperationUtils.cc
new file mode 100644
index 000000000..8aa15dcbd
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/OperationUtils.cc
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationUtils.h"
+
+#include <cmath>
+#include <algorithm>
+#include <cassert>
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+uint32_t getNumberOfDimensions(const TensorDescriptor &descr) { return descr.dimensions.size(); }
+
+uint32_t getNumberOfElements(const TensorDescriptor &descr)
+{
+ uint32_t count = 1;
+ for (size_t i = 0; i < descr.dimensions.size(); i++)
+ {
+ count *= descr.dimensions[i];
+ }
+ return count;
+}
+
+uint32_t getSizeOfDimension(const TensorDescriptor &descr, uint32_t dimensionIdx)
+{
+ if (dimensionIdx >= descr.dimensions.size())
+ {
+ // TODO, log the error
+ return 0;
+ }
+ return descr.dimensions[dimensionIdx];
+}
+
+void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
+{
+ if (double_multiplier == 0.)
+ {
+ *quantized_multiplier = 0;
+ *shift = 0;
+ return;
+ }
+ const double q = std::frexp(double_multiplier, shift);
+ auto q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+
+ assert(q_fixed <= (1ll << 31));
+ if (q_fixed == (1ll << 31))
+ {
+ q_fixed /= 2;
+ ++*shift;
+ }
+ assert(q_fixed <= std::numeric_limits<int32_t>::max());
+ *quantized_multiplier = static_cast<int32_t>(q_fixed);
+}
+
+void GetQuantizedConvolutionMultiplier(const TensorDescriptor &inputDescr,
+ const TensorDescriptor &filterDescr,
+ const TensorDescriptor &biasDescr,
+ const TensorDescriptor &outputDescr, float *multiplier)
+{
+ const float input_product_scale = inputDescr.scale * filterDescr.scale;
+ const float bias_scale = biasDescr.scale;
+ const float output_scale = outputDescr.scale;
+ // The following conditions must be guaranteed by the training pipeline.
+ UNUSED_RELEASE(bias_scale);
+ assert(std::abs(input_product_scale - bias_scale) <=
+ 1e-6 * std::min(input_product_scale, bias_scale));
+ assert(input_product_scale >= 0);
+ assert(input_product_scale < output_scale);
+ *multiplier = input_product_scale / output_scale;
+}
+
+void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
+ int *left_shift)
+{
+ assert(double_multiplier > 1.);
+ const double q = std::frexp(double_multiplier, left_shift);
+ int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+ assert(q_fixed <= (1ll << 31));
+ if (q_fixed == (1ll << 31))
+ {
+ q_fixed /= 2;
+ ++*left_shift;
+ }
+ assert(*left_shift >= 0);
+ assert(q_fixed <= std::numeric_limits<int32_t>::max());
+ *quantized_multiplier = static_cast<int32_t>(q_fixed);
+}
+
+void CalculateActivationRangeFloat(ir::Activation activation, float *activation_min,
+ float *activation_max)
+{
+ if (activation == ir::Activation::RELU)
+ {
+ *activation_min = 0.f;
+ *activation_max = std::numeric_limits<float>::max();
+ }
+ else if (activation == ir::Activation::RELU6)
+ {
+ *activation_min = 0.f;
+ *activation_max = 6.f;
+ }
+ else if (activation == ir::Activation::RELU1)
+ {
+ *activation_min = -1.f;
+ *activation_max = 1.f;
+ }
+ else if (activation == ir::Activation::SIGMOID)
+ {
+ *activation_min = 0.f;
+ *activation_max = 1.f;
+ }
+ else if (activation == ir::Activation::NONE)
+ {
+ *activation_min = std::numeric_limits<float>::lowest();
+ *activation_max = std::numeric_limits<float>::max();
+ }
+ else
+ {
+ std::cout << "Unsupported fused activation function." << std::endl;
+ }
+}
+
+void CalculateActivationRangeUint8(ir::Activation activation, const TensorDescriptor &outputDescr,
+ int32_t *act_min, int32_t *act_max)
+{
+ const int32_t qmin = std::numeric_limits<uint8_t>::min();
+ const int32_t qmax = std::numeric_limits<uint8_t>::max();
+ const auto scale = outputDescr.scale;
+ const auto zero_point = outputDescr.offset;
+ auto quantize = [scale, zero_point](float f) {
+ return zero_point + static_cast<int32_t>(std::round(f / scale));
+ };
+ if (activation == ir::Activation::RELU)
+ {
+ *act_min = std::max(qmin, quantize(0.0));
+ *act_max = qmax;
+ }
+ else if (activation == ir::Activation::RELU6)
+ {
+ *act_min = std::max(qmin, quantize(0.0));
+ *act_max = std::min(qmax, quantize(6.0));
+ }
+ else if (activation == ir::Activation::RELU1)
+ {
+ *act_min = std::max(qmin, quantize(-1.0));
+ *act_max = std::min(qmax, quantize(1.0));
+ }
+ else if (activation == ir::Activation::SIGMOID)
+ {
+ *act_min = std::max(qmin, quantize(0.0));
+ *act_max = std::min(qmax, quantize(1.0));
+ }
+ else if (activation == ir::Activation::NONE)
+ {
+ *act_min = qmin;
+ *act_max = qmax;
+ }
+ else
+ {
+ std::cout << "Unsupported fused activation function." << std::endl;
+ }
+}
+
+bool HaveSameShapes(const TensorDescriptor *input1, const TensorDescriptor *input2)
+{
+ if (input1 == input2)
+ return true;
+ if (input2 == NULL || input2 == NULL)
+ return false;
+
+ if (input1 == NULL)
+ {
+ return (getNumberOfDimensions(*input2) == 0);
+ }
+
+ if (getNumberOfDimensions(*input1) != getNumberOfDimensions(*input2))
+ return false;
+
+ for (uint32_t i = 0; i < getNumberOfDimensions(*input1); i++)
+ if (input1->dimensions[i] != input2->dimensions[i])
+ return false;
+
+ return true;
+}
+
+int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift)
+{
+ const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
+ (1ll << (31 - input_integer_bits)) / (1ll << input_left_shift);
+ // Tighten bound using floor. Suppose that we could use the exact value.
+ // After scaling the difference, the result would be at the maximum. Thus we
+ // must ensure that our value has lower magnitude.
+ return static_cast<int32_t>(std::floor(max_input_rescaled));
+}
+
+TensorDescriptor getTensorDescriptor(const ir::Operand &o, ir::Layout frontend_layout)
+{
+ TensorDescriptor descriptor;
+
+ auto dims = o.shape().dims();
+ if (frontend_layout == ir::Layout::NCHW && o.shape().rank() == 4)
+ {
+ // NCHW -> NHWC
+ uint32_t permutation[4] = {0, 2, 3, 1};
+ for (int i = 0; i < o.shape().rank(); ++i)
+ {
+ dims.at(i) = o.shape().dim(permutation[i]);
+ }
+ }
+ descriptor.dimensions = std::vector<uint32_t>(dims.begin(), dims.end());
+ descriptor.type = static_cast<OperandType>(static_cast<int32_t>(o.typeInfo().type()));
+ descriptor.scale = o.typeInfo().scale();
+ descriptor.offset = o.typeInfo().offset();
+
+ // CPU backend assume that neurun internal shape's rank is always same or less than 4
+ assert(descriptor.dimensions.size() <= 4);
+
+ return descriptor;
+}
+
+uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions)
+{
+ uint32_t size = 4;
+
+ switch (type)
+ {
+ case OperandType::FLOAT32:
+ case OperandType::INT32:
+ case OperandType::UINT32:
+ size = 4;
+ break;
+ case OperandType::BOOL8:
+ case OperandType::QUANT8_ASYMM:
+ case OperandType::QUANT8_SYMM:
+ size = 1;
+ break;
+ default:
+ throw std::runtime_error("Not supported operand type.");
+ break;
+ }
+
+ for (auto d : dimensions)
+ {
+ size *= d;
+ }
+
+ return size;
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/OperationUtils.h b/runtime/neurun/backend/cpu/kernel/OperationUtils.h
new file mode 100644
index 000000000..b9e8c8974
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/OperationUtils.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
+#define __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
+
+#include <iostream>
+#include <limits>
+#include <vector>
+
+#include <cker/Shape.h>
+
+#include "ir/Operand.h"
+#include "ir/DataType.h"
+#include <ir/InternalType.h>
+
+using OperandType = neurun::ir::DataType;
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+struct TensorDescriptor
+{
+ OperandType type;
+ std::vector<uint32_t> dimensions;
+ float scale;
+ int32_t offset;
+};
+
+union DataPtr {
+ uint8_t *u8;
+ int8_t *i8;
+ int32_t *i32;
+ float *f;
+ void *v;
+};
+
+uint32_t getNumberOfDimensions(const TensorDescriptor &descr);
+
+uint32_t getNumberOfElements(const TensorDescriptor &descr);
+
+uint32_t getSizeOfDimension(const TensorDescriptor &descr, uint32_t dimensionIdx);
+
+inline nnfw::cker::Shape convertToExtendedCkerShape(const TensorDescriptor &descr)
+{
+ std::vector<int32_t> raw_shape;
+ raw_shape.resize(4);
+
+ uint32_t src = 4 - descr.dimensions.size();
+ for (uint32_t i = 0; i < 4; ++i)
+ {
+ if (i < src)
+ {
+ raw_shape[i] = 1;
+ }
+ else
+ {
+ raw_shape[i] = descr.dimensions[i - src];
+ }
+ }
+
+ return nnfw::cker::GetShape(raw_shape);
+}
+
+inline nnfw::cker::Shape convertTensorDescriptorToCkerShape(const TensorDescriptor &descr)
+{
+ std::vector<int32_t> raw_shape;
+ raw_shape.resize(4);
+
+ for (uint32_t i = 0; i < 4; ++i)
+ {
+ if (i >= descr.dimensions.size())
+ {
+ raw_shape[i] = 1;
+ }
+ else
+ {
+ raw_shape[i] = descr.dimensions[i];
+ }
+ }
+
+ return nnfw::cker::GetShape(raw_shape);
+}
+
+inline int32_t getAxis(uint32_t rank, int32_t axis, ir::Layout frontend_layout)
+{
+ auto ret = axis;
+
+ if (axis < 0)
+ {
+ ret += rank;
+ }
+
+ // NCHW -> NHWC
+ if (frontend_layout == ir::Layout::NCHW)
+ {
+ int32_t permutation[4] = {0, 3, 1, 2};
+ ret = permutation[ret];
+ }
+
+ return ret;
+}
+
+void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
+
+void GetQuantizedConvolutionMultiplier(const TensorDescriptor &inputDescr,
+ const TensorDescriptor &filterDescr,
+ const TensorDescriptor &biasDescr,
+ const TensorDescriptor &outputDescr, float *multiplier);
+
+void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
+ int *left_shift);
+
+void CalculateActivationRangeFloat(ir::Activation activation, float *activation_min,
+ float *activation_max);
+
+void CalculateActivationRangeUint8(ir::Activation activation, const TensorDescriptor &outputDescr,
+ int32_t *act_min, int32_t *act_max);
+
+bool HaveSameShapes(const TensorDescriptor *input1, const TensorDescriptor *input2);
+
+int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift);
+
+TensorDescriptor getTensorDescriptor(const ir::Operand &o, ir::Layout frontend_layout);
+
+uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions);
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
diff --git a/runtime/neurun/backend/cpu/kernel/PadLayer.cc b/runtime/neurun/backend/cpu/kernel/PadLayer.cc
new file mode 100644
index 000000000..1fd9429b5
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/PadLayer.cc
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PadLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Pad.h>
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+PadLayer::PadLayer()
+ : _inputData(), _outputData(), _inputDescr(), _outputDescr(), _padData(), _padRank(),
+ _constantValueData(), _inputType(OperandType::FLOAT32)
+{
+ // DO NOTHING
+}
+
+void PadLayer::padFloat32()
+{
+ nnfw::cker::Pad(_padData, _padRank, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f,
+ convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f,
+ _constantValueData.f);
+}
+void PadLayer::padQuant8() { throw std::runtime_error("Quantized Pad isn't supported NYI"); }
+
+void PadLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *outputData,
+ const TensorDescriptor outputDescr, const int32_t *padData,
+ int32_t padRank, uint8_t *constantValueData)
+{
+ _inputData.u8 = inputData;
+ _inputDescr = inputDescr;
+ _inputType = inputDescr.type;
+ _outputData.u8 = outputData;
+ _outputDescr = outputDescr;
+ _padData = padData;
+ _padRank = padRank;
+ _constantValueData.u8 = constantValueData;
+}
+
+void PadLayer::run()
+{
+ if (_inputType == OperandType::FLOAT32)
+ {
+ padFloat32();
+ }
+ else if (_inputType == OperandType::QUANT8_ASYMM)
+ {
+ padQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/PadLayer.h b/runtime/neurun/backend/cpu/kernel/PadLayer.h
new file mode 100644
index 000000000..f4413a8ed
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/PadLayer.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_KERNEL_PADLAYER_H__
+#define __NEURUN_BACKEND_CPU_KERNEL_PADLAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+// Note, this is pad with mode=`CONSTANT`: it doesn't support `REFLECT` and `SYMMETRIC`
+class PadLayer : public ::neurun::exec::IFunction
+{
+public:
+ PadLayer();
+
+public:
+ void padFloat32();
+
+ void padQuant8();
+
+ void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *outputData,
+ const TensorDescriptor outputDescr, const int32_t *padData, int32_t padRank,
+ uint8_t *constantValueData = nullptr);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ DataPtr _inputData;
+ DataPtr _outputData;
+
+ TensorDescriptor _inputDescr;
+ TensorDescriptor _outputDescr;
+
+ const int32_t *_padData;
+ int32_t _padRank;
+ DataPtr _constantValueData;
+
+ OperandType _inputType;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_KERNEL_PADLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/PermuteLayer.cc b/runtime/neurun/backend/cpu/kernel/PermuteLayer.cc
new file mode 100644
index 000000000..6f28d8436
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/PermuteLayer.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PermuteLayer.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+using Type = ir::operation::Permute::Type;
+
+void PermuteLayer::configure(std::shared_ptr<backend::operand::ITensor> input,
+ std::shared_ptr<backend::operand::ITensor> output,
+ const ir::Shape &output_shape, Type type, ir::DataType dataType)
+{
+ _input = input;
+ _output = output;
+ _output_shape = output_shape;
+ _type = type;
+ _dataType = dataType;
+}
+
+void PermuteLayer::run()
+{
+ using ir::DataType;
+ switch (_dataType)
+ {
+ case DataType::FLOAT32:
+ runTempl<float>();
+ break;
+ case DataType::INT32:
+ runTempl<int32_t>();
+ break;
+ case DataType::UINT32:
+ runTempl<uint32_t>();
+ break;
+ case DataType::BOOL8:
+ case DataType::QUANT8_ASYMM:
+ runTempl<uint8_t>();
+ break;
+ case DataType::QUANT8_SYMM:
+ runTempl<int8_t>();
+ break;
+ default:
+ throw std::runtime_error("NYI");
+ break;
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/PermuteLayer.h b/runtime/neurun/backend/cpu/kernel/PermuteLayer.h
new file mode 100644
index 000000000..1f9110807
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/PermuteLayer.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_KERNEL_PERMUTE_LAYER_H__
+#define __NEURUN_BACKEND_CPU_KERNEL_PERMUTE_LAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "util/feature/nhwc/View.h"
+#include "OperationUtils.h"
+#include "ir/operation/Permute.h"
+#include "util/feature/nhwc/Reader.h"
+#include "util/feature/nchw/View.h"
+#include "util/Coordinates.h"
+
+#include <misc/feature/IndexIterator.h>
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class PermuteLayer : public ::neurun::exec::IFunction
+{
+public:
+ PermuteLayer() = default;
+
+public:
+ void configure(std::shared_ptr<backend::operand::ITensor> input,
+ std::shared_ptr<backend::operand::ITensor> output, const ir::Shape &output_shape,
+ ir::operation::Permute::Type type, ir::DataType dataType);
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ template <class T> void runTempl()
+ {
+ auto rank = _output_shape.rank();
+ auto fn = [&](::neurun::backend::operand::ITensor &in_tensor) {
+ _output->access([&](::neurun::backend::operand::ITensor &out_tensor) {
+ auto input_buffer = in_tensor.buffer();
+ auto input_size = in_tensor.total_size();
+ auto output_buffer = out_tensor.buffer();
+ if (_type == ir::operation::Permute::Type::COPY)
+ {
+ assert(in_tensor.layout() == out_tensor.layout());
+ if (!in_tensor.has_padding() && !out_tensor.has_padding())
+ {
+ assert(input_size == out_tensor.total_size());
+ memcpy(output_buffer, input_buffer, input_size);
+ return;
+ }
+ }
+ switch (rank)
+ {
+ case 0:
+ case 1:
+ {
+ const int32_t copy_len = _output_shape.dim(0);
+
+ memcpy(output_buffer, input_buffer, copy_len);
+ break;
+ }
+ case 2:
+ {
+ const int32_t copy_len = _output_shape.dim(1);
+
+ for (auto i = 0; i < _output_shape.dim(0); ++i)
+ {
+ neurun::util::Coordinates coords{i, 0};
+ memcpy(output_buffer + out_tensor.calcOffset(coords),
+ input_buffer + in_tensor.calcOffset(coords), copy_len * sizeof(T));
+ }
+ break;
+ }
+ case 3:
+ {
+ const int32_t copy_len = _output_shape.dim(2);
+
+ for (auto i = 0; i < _output_shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < _output_shape.dim(1); ++j)
+ {
+ neurun::util::Coordinates coords{i, j, 0};
+ memcpy(output_buffer + out_tensor.calcOffset(coords),
+ input_buffer + in_tensor.calcOffset(coords), copy_len * sizeof(T));
+ }
+ }
+ break;
+ }
+ case 4:
+ {
+ // TODO Unify permute type and remove switch case
+ switch (_type)
+ {
+ case ir::operation::Permute::Type::NHWC_TO_NCHW:
+ {
+ for (auto n = 0; n < _output_shape.dim(0); ++n)
+ {
+ for (auto c = 0; c < _output_shape.dim(1); ++c)
+ {
+ for (auto h = 0; h < _output_shape.dim(2); ++h)
+ {
+ for (auto w = 0; w < _output_shape.dim(3); ++w)
+ {
+ const neurun::util::Coordinates in_coords{n, h, w, c};
+ const auto out_coords =
+ convertCoordinates(in_coords, in_tensor.layout(), out_tensor.layout());
+ const auto value =
+ *reinterpret_cast<T *>(input_buffer + in_tensor.calcOffset(in_coords));
+ *reinterpret_cast<T *>(output_buffer + out_tensor.calcOffset(out_coords)) =
+ value;
+ }
+ }
+ }
+ }
+ break;
+ }
+ case ir::operation::Permute::Type::NCHW_TO_NHWC:
+ {
+ for (auto n = 0; n < _output_shape.dim(0); ++n)
+ {
+ for (auto h = 0; h < _output_shape.dim(1); ++h)
+ {
+ for (auto w = 0; w < _output_shape.dim(2); ++w)
+ {
+ for (auto c = 0; c < _output_shape.dim(3); ++c)
+ {
+ const neurun::util::Coordinates in_coords{n, c, h, w};
+ const auto out_coords =
+ convertCoordinates(in_coords, in_tensor.layout(), out_tensor.layout());
+ const auto value =
+ *reinterpret_cast<T *>(input_buffer + in_tensor.calcOffset(in_coords));
+ *reinterpret_cast<T *>(output_buffer + out_tensor.calcOffset(out_coords)) =
+ value;
+ }
+ }
+ }
+ }
+ break;
+ }
+ case ir::operation::Permute::Type::COPY:
+ {
+ const int32_t copy_len = _output_shape.dim(3);
+
+ for (auto i = 0; i < _output_shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < _output_shape.dim(1); ++j)
+ {
+ for (auto k = 0; k < _output_shape.dim(2); ++k)
+ {
+ neurun::util::Coordinates coords{i, j, k, 0};
+ memcpy(output_buffer + out_tensor.calcOffset(coords),
+ input_buffer + in_tensor.calcOffset(coords), copy_len * sizeof(T));
+ }
+ }
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI");
+ break;
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI");
+ break;
+ }
+ });
+ };
+ _input->access(fn);
+ }
+
+private:
+ std::shared_ptr<backend::operand::ITensor> _input{nullptr};
+ std::shared_ptr<backend::operand::ITensor> _output{nullptr};
+ ir::Shape _output_shape{};
+ ir::operation::Permute::Type _type{ir::operation::Permute::Type::COPY};
+ ir::DataType _dataType{ir::DataType::FLOAT32};
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_KERNEL_PERMUTE_LAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/ReshapeLayer.cc b/runtime/neurun/backend/cpu/kernel/ReshapeLayer.cc
new file mode 100644
index 000000000..caeee9f12
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/ReshapeLayer.cc
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReshapeLayer.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+ReshapeLayer::ReshapeLayer() : _inputData(), _outputData(), _inputDescr(), _outputDescr()
+{
+ // DO NOTHING
+}
+
+void ReshapeLayer::reshapeGeneric()
+{
+ size_t count = sizeOfData(_inputDescr.type, _inputDescr.dimensions);
+ memcpy(_outputData.v, _inputData.v, count);
+}
+
+void ReshapeLayer::configure(uint8_t *inputData, const TensorDescriptor &inputDescr,
+ uint8_t *outputData, const TensorDescriptor &outputDescr)
+{
+ _inputData.u8 = inputData;
+ _inputDescr = inputDescr;
+ _outputData.u8 = outputData;
+ _outputDescr = outputDescr;
+}
+
+void ReshapeLayer::run() { reshapeGeneric(); }
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/ReshapeLayer.h b/runtime/neurun/backend/cpu/kernel/ReshapeLayer.h
new file mode 100644
index 000000000..25dd851b2
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/ReshapeLayer.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_KERNEL_RESHAPELAYER_H__
+#define __NEURUN_BACKEND_CPU_KERNEL_RESHAPELAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class ReshapeLayer : public ::neurun::exec::IFunction
+{
+public:
+ ReshapeLayer();
+
+public:
+ void reshapeGeneric();
+
+ void configure(uint8_t *inputData, const TensorDescriptor &inputDescr, uint8_t *outputData,
+ const TensorDescriptor &outputDescr);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ DataPtr _inputData;
+ DataPtr _outputData;
+
+ TensorDescriptor _inputDescr;
+ TensorDescriptor _outputDescr;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_KERNEL_RESHAPELAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.cc b/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.cc
new file mode 100644
index 000000000..58ba109b4
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.cc
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SoftMaxLayer.h"
+
+#include <cker/operation/SoftMax.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+SoftMaxLayer::SoftMaxLayer()
+ : _inputData(), _outputData(), _beta(0.0), _inputDescr(), _outputDescr(),
+ _inputType(OperandType::FLOAT32)
+{
+ // DO NOTHING
+}
+
+// Performs softmax along the input of size (input_size * batch_size).
+void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
+ float *out)
+{
+ assert(input_size > 0);
+
+ // For each batch
+ for (int b = 0; b < batch_size; b++)
+ {
+ // Find the max coeff.
+ float max_coeff = in[0];
+ for (int i = 1; i < input_size; i++)
+ {
+ if (in[i] > max_coeff)
+ max_coeff = in[i];
+ }
+
+ // Compute the normalized sum of exps.
+ float exp_sum = 0.0;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] = std::exp((in[i] - max_coeff) * beta);
+ exp_sum += out[i];
+ }
+
+ // Divide by the sum of exps.
+ float reciprocal_sum_exp = 1.f / exp_sum;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] *= reciprocal_sum_exp;
+ }
+
+ // Advance in and out pointers for the next batch.
+ in += input_size;
+ out += input_size;
+ }
+}
+
+void SoftMaxLayer::softmaxFloat32()
+{
+ TensorDescriptor descrIn4D;
+
+ if (getNumberOfDimensions(_inputDescr) == 2)
+ {
+ uint32_t batch_size = getSizeOfDimension(_inputDescr, 0);
+ if (batch_size == 0)
+ throw std::runtime_error("batch_size should not be 0");
+
+ uint32_t input_size = getNumberOfElements(_inputDescr) / batch_size;
+ Softmax(_inputData.f, input_size, batch_size, _beta, _outputData.f);
+ }
+ else if (getNumberOfDimensions(_inputDescr) == 4)
+ {
+ nnfw::cker::SoftmaxParams op_params;
+ op_params.beta = _beta;
+ nnfw::cker::Softmax(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f,
+ convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f);
+ }
+ else
+ {
+ throw std::runtime_error{"only 2D and 4D tensors supported"};
+ }
+}
+
+void SoftMaxLayer::softmaxQuant8()
+{
+ TensorDescriptor descrIn4D = _inputDescr;
+
+ if (getNumberOfDimensions(_inputDescr) == 2)
+ {
+ uint32_t batch_size = getSizeOfDimension(_inputDescr, 0);
+ if (batch_size == 0)
+ throw std::runtime_error("batch_size should not be 0");
+
+ uint32_t input_size = getNumberOfElements(_inputDescr) / batch_size;
+ descrIn4D.dimensions = {batch_size, 1, 1, input_size};
+ }
+ else if (getNumberOfDimensions(_inputDescr) == 4)
+ {
+ descrIn4D = _inputDescr;
+ }
+ else
+ {
+ throw std::runtime_error{"only 2D and 4D tensors supported"};
+ }
+ if (_outputDescr.offset != 0 || _outputDescr.scale != 1.f / 256)
+ {
+ throw std::runtime_error{"incorrect scale / offset for output"};
+ }
+ static const int32_t kScaledDiffIntegerBits = 5;
+ const double input_beta_real_multiplier = std::min(
+ 1.0 * _beta * _inputDescr.scale * (1 << (31 - kScaledDiffIntegerBits)), (1ll << 31) - 1.0);
+ int32_t input_multiplier = 0;
+ int32_t input_left_shift = 0;
+ QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier, &input_multiplier,
+ &input_left_shift);
+ float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift);
+
+ nnfw::cker::SoftmaxParams op_params;
+ op_params.input_multiplier = input_multiplier;
+ op_params.input_left_shift = input_left_shift;
+ op_params.diff_min = diff_min;
+ nnfw::cker::Softmax(op_params, convertTensorDescriptorToCkerShape(descrIn4D), _inputData.u8,
+ convertTensorDescriptorToCkerShape(descrIn4D), _outputData.u8);
+}
+
+void SoftMaxLayer::configure(uint8_t *inputData, const TensorDescriptor &inputDescr,
+ const float beta, uint8_t *outputData,
+ const TensorDescriptor &outputDescr)
+{
+ _inputData.u8 = inputData;
+ _inputDescr = inputDescr;
+ _inputType = inputDescr.type;
+ _outputData.u8 = outputData;
+ _outputDescr = outputDescr;
+ _beta = beta;
+}
+
+void SoftMaxLayer::run()
+{
+ if (_inputType == OperandType::FLOAT32)
+ {
+ softmaxFloat32();
+ }
+ else if (_inputType == OperandType::QUANT8_ASYMM)
+ {
+ softmaxQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.h b/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.h
new file mode 100644
index 000000000..4723afb72
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_KERNEL_SOFTMAXLAYER_H__
+#define __NEURUN_BACKEND_CPU_KERNEL_SOFTMAXLAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class SoftMaxLayer : public ::neurun::exec::IFunction
+{
+public:
+ SoftMaxLayer();
+
+public:
+ void softmaxFloat32();
+
+ void softmaxQuant8();
+
+ void configure(uint8_t *inputData, const TensorDescriptor &inputDescr, const float beta,
+ uint8_t *outputData, const TensorDescriptor &outputDescr);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ DataPtr _inputData;
+ DataPtr _outputData;
+
+ float _beta;
+
+ TensorDescriptor _inputDescr;
+ TensorDescriptor _outputDescr;
+
+ OperandType _inputType;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_KERNEL_SOFTMAXLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/SubLayer.cc b/runtime/neurun/backend/cpu/kernel/SubLayer.cc
new file mode 100644
index 000000000..c6f7188e0
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/SubLayer.cc
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SubLayer.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+void SubLayer::subFloat32()
+{
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ nnfw::cker::BinaryArithmeticOpParam op_params;
+ op_params.float_activation_max = output_activation_max;
+ op_params.float_activation_min = output_activation_min;
+ const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) {
+ return a - b;
+ };
+
+ if (!HaveSameShapes(&_lhsDescr, &_rhsDescr))
+ {
+ nnfw::cker::BroadcastBinaryArithmeticOpSlow(
+ op_params, convertToExtendedCkerShape(_lhsDescr), _lhsData.f,
+ convertToExtendedCkerShape(_rhsDescr), _rhsData.f, convertToExtendedCkerShape(_outputDescr),
+ _outputData.f, fn);
+ return;
+ }
+
+ nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr),
+ _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr),
+ _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr),
+ _outputData.f, fn);
+}
+
+void SubLayer::subQuant8()
+{
+ int32_t output_activation_min, output_activation_max;
+ CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
+ &output_activation_max);
+ // nnfw::cker::SubParam op_params;
+ // op_params.quantized_activation_max = output_activation_max;
+ // op_params.quantized_activation_min = output_activation_min;
+
+ // cker quant8 sub is not implemented yet
+ throw std::runtime_error{"NYI"};
+}
+
+void SubLayer::configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
+ const TensorDescriptor &rhsDescr, const ir::Activation activation,
+ uint8_t *outputData, const TensorDescriptor &outputDescr)
+{
+ _lhsData.u8 = lhsData;
+ _lhsDescr = lhsDescr;
+ _rhsData.u8 = rhsData;
+ _rhsDescr = rhsDescr;
+ _inputType = lhsDescr.type;
+ _activation = activation;
+ _outputData.u8 = outputData;
+ _outputDescr = outputDescr;
+}
+
+void SubLayer::run()
+{
+ if (_inputType == OperandType::FLOAT32)
+ {
+ subFloat32();
+ }
+ else if (_inputType == OperandType::QUANT8_ASYMM)
+ {
+ subQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/SubLayer.h b/runtime/neurun/backend/cpu/kernel/SubLayer.h
new file mode 100644
index 000000000..c9abdb48c
--- /dev/null
+++ b/runtime/neurun/backend/cpu/kernel/SubLayer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_KERNEL_SUBLAYER_H__
+#define __NEURUN_BACKEND_CPU_KERNEL_SUBLAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class SubLayer : public ::neurun::exec::IFunction
+{
+public:
+ SubLayer() : _lhsData(), _rhsData(), _outputData(), _lhsDescr(), _rhsDescr(), _outputDescr()
+ {
+ // DO NOTHING
+ }
+
+public:
+ void subFloat32();
+
+ void subQuant8();
+
+ void configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
+ const TensorDescriptor &rhsDescr, const ir::Activation activation,
+ uint8_t *outputData, const TensorDescriptor &outputDescr);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ DataPtr _lhsData;
+ DataPtr _rhsData;
+ DataPtr _outputData;
+
+ TensorDescriptor _lhsDescr;
+ TensorDescriptor _rhsDescr;
+ TensorDescriptor _outputDescr;
+
+ ir::Activation _activation{ir::Activation::NONE};
+
+ OperandType _inputType{OperandType::FLOAT32};
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_KERNEL_SUBLAYER_H__