summaryrefslogtreecommitdiff
path: root/libs/kernel/acl/src
diff options
context:
space:
mode:
Diffstat (limited to 'libs/kernel/acl/src')
-rw-r--r--libs/kernel/acl/src/CLUniqueTensor.h63
-rw-r--r--libs/kernel/acl/src/DepthwiseConv2D.h98
-rw-r--r--libs/kernel/acl/src/DepthwiseConv2D.test.h245
-rw-r--r--libs/kernel/acl/src/FullyConnected.h149
-rw-r--r--libs/kernel/acl/src/FullyConnected.test.h266
-rw-r--r--libs/kernel/acl/src/IO_accessor.cpp310
-rw-r--r--libs/kernel/acl/src/IO_accessor.h196
-rw-r--r--libs/kernel/acl/src/Init_acl.cpp32
-rw-r--r--libs/kernel/acl/src/NEUniqueTensor.h64
-rw-r--r--libs/kernel/acl/src/Reshape.h70
-rw-r--r--libs/kernel/acl/src/Reshape.test.h51
-rw-r--r--libs/kernel/acl/src/cl/Concatenation.cpp104
-rw-r--r--libs/kernel/acl/src/cl/Concatenation.test.cpp62
-rw-r--r--libs/kernel/acl/src/cl/Conv2D.cpp113
-rw-r--r--libs/kernel/acl/src/cl/Conv2D.test.cpp202
-rw-r--r--libs/kernel/acl/src/cl/DepthwiseConv2D.cpp60
-rw-r--r--libs/kernel/acl/src/cl/DepthwiseConv2D.test.cpp20
-rw-r--r--libs/kernel/acl/src/cl/FullyConnected.cpp53
-rw-r--r--libs/kernel/acl/src/cl/FullyConnected.test.cpp20
-rw-r--r--libs/kernel/acl/src/cl/Pooling.cpp130
-rw-r--r--libs/kernel/acl/src/cl/Pooling.test.cpp482
-rw-r--r--libs/kernel/acl/src/cl/Reshape.cpp43
-rw-r--r--libs/kernel/acl/src/cl/Reshape.test.cpp20
-rw-r--r--libs/kernel/acl/src/cl/Softmax.cpp78
-rw-r--r--libs/kernel/acl/src/cl/Softmax.test.cpp105
-rw-r--r--libs/kernel/acl/src/gtest_env.cpp37
-rw-r--r--libs/kernel/acl/src/neon/Concatenation.cpp105
-rw-r--r--libs/kernel/acl/src/neon/Concatenation.test.cpp62
-rw-r--r--libs/kernel/acl/src/neon/Conv2D.cpp111
-rw-r--r--libs/kernel/acl/src/neon/Conv2D.test.cpp202
-rw-r--r--libs/kernel/acl/src/neon/DepthwiseConv2D.cpp61
-rw-r--r--libs/kernel/acl/src/neon/DepthwiseConv2D.test.cpp20
-rw-r--r--libs/kernel/acl/src/neon/FullyConnected.cpp58
-rw-r--r--libs/kernel/acl/src/neon/FullyConnected.test.cpp21
-rw-r--r--libs/kernel/acl/src/neon/Pooling.cpp128
-rw-r--r--libs/kernel/acl/src/neon/Pooling.test.cpp436
-rw-r--r--libs/kernel/acl/src/neon/Reshape.cpp48
-rw-r--r--libs/kernel/acl/src/neon/Reshape.test.cpp20
-rw-r--r--libs/kernel/acl/src/neon/Softmax.cpp77
-rw-r--r--libs/kernel/acl/src/neon/Softmax.test.cpp105
-rw-r--r--libs/kernel/acl/src/shape.cpp89
-rw-r--r--libs/kernel/acl/src/shape.h93
-rw-r--r--libs/kernel/acl/src/support.cpp51
-rw-r--r--libs/kernel/acl/src/support.h93
-rw-r--r--libs/kernel/acl/src/util.cpp108
-rw-r--r--libs/kernel/acl/src/util.h193
46 files changed, 5154 insertions, 0 deletions
diff --git a/libs/kernel/acl/src/CLUniqueTensor.h b/libs/kernel/acl/src/CLUniqueTensor.h
new file mode 100644
index 000000000..6844e4565
--- /dev/null
+++ b/libs/kernel/acl/src/CLUniqueTensor.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_CLUNIQUETENSOR_H__
+#define __NNFW_KERNEL_ACL_CLUNIQUETENSOR_H__
+
+#include <arm_compute/runtime/CL/CLTensor.h>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+class CLUniqueTensor
+{
+public:
+ CLUniqueTensor(const ::arm_compute::TensorInfo &info)
+ {
+ _tensor.allocator()->init(info);
+ }
+
+public:
+ // Both copy and move are not allowed
+ CLUniqueTensor(const CLUniqueTensor &) = delete;
+ CLUniqueTensor(CLUniqueTensor &&) = delete;
+
+public:
+ ~CLUniqueTensor()
+ {
+ _tensor.allocator()->free();
+ }
+
+public:
+ void allocate()
+ {
+ _tensor.allocator()->allocate();
+ }
+
+public:
+ ::arm_compute::CLTensor &ref(void) { return _tensor; }
+ ::arm_compute::CLTensor *ptr(void) { return &_tensor; }
+
+private:
+ ::arm_compute::CLTensor _tensor;
+};
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif //__NNFW_KERNEL_ACL_CLUNIQUETENSOR_H__
diff --git a/libs/kernel/acl/src/DepthwiseConv2D.h b/libs/kernel/acl/src/DepthwiseConv2D.h
new file mode 100644
index 000000000..8af8d4fd0
--- /dev/null
+++ b/libs/kernel/acl/src/DepthwiseConv2D.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_DEPTHWISECONV2D_COMMON_H__
+#define __NNFW_KERNEL_ACL_DEPTHWISECONV2D_COMMON_H__
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include <arm_compute/runtime/IFunction.h>
+
+#include "shape.h"
+#include "IO_accessor.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+namespace common {
+
+typedef std::function<void (void)> sync_scheduler_f;
+
+template<class TensorT, class LayerT, class ActT>
+bool depthwiseConvFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* filterData, const nnfw::rt::Shape& filterShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t depth_multiplier, int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape,
+ sync_scheduler_f sync_scheduler) {
+ auto inputShapeACL = util::fromNNShape(inputShape);
+ auto weightsShapeACL = util::fromNNShape(filterShape);
+ auto biasShapeACL = util::fromNNShape(biasShape);
+ auto outputShapeACL = util::fromNNShape(outputShape);
+
+ TensorT input(arm_compute::TensorInfo(inputShapeACL, arm_compute::Format::F32));
+ TensorT weights(arm_compute::TensorInfo(weightsShapeACL, arm_compute::Format::F32));
+ TensorT bias(arm_compute::TensorInfo(biasShapeACL, arm_compute::Format::F32));
+ TensorT output(arm_compute::TensorInfo(outputShapeACL, arm_compute::Format::F32));
+
+ arm_compute::PadStrideInfo psinfo = arm_compute::PadStrideInfo(stride_width, stride_height,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ auto l = std::make_shared<LayerT>();
+ l->configure(input.ptr(), weights.ptr(), bias.ptr(), output.ptr(), psinfo);
+
+ std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+ fns.emplace_back(l);
+
+ util::insertFusedActivationLayer<TensorT, ActT>(output, activation, fns);
+
+ input.allocate();
+ output.allocate();
+ bias.allocate();
+ weights.allocate();
+
+ // TODO: Do we need 2D tensor accessor for the input feature?
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+ TensorAccess<BiasAccessor>(bias.ref(), biasData, biasShape);
+ TensorAccess<WeightAccessor>(weights.ref(), filterData, filterShape);
+
+ for (const auto &fn : fns)
+ {
+ fn->run();
+ }
+
+ sync_scheduler();
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+
+ return true;
+}
+
+} // namespace common
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_DEPTHWISECONV2D_COMMON_H__
diff --git a/libs/kernel/acl/src/DepthwiseConv2D.test.h b/libs/kernel/acl/src/DepthwiseConv2D.test.h
new file mode 100644
index 000000000..b2c8592ee
--- /dev/null
+++ b/libs/kernel/acl/src/DepthwiseConv2D.test.h
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <kernel/acl/DepthwiseConv2D.h>
+
+// TODO: fix include path in CMakeFiles
+#include "util.h"
+
+#ifndef ACL_TEST
+#error "ACL_TEST should be defined first!"
+#endif // ACL_TEST
+
+#ifndef ACL_CORE_FUNC_NAME
+#error "ACL_CORE_FUNC_NAME should be defined first!"
+#endif // ACL_CORE_FUNC_NAME
+
+using namespace nnfw::kernel::acl;
+
+ACL_TEST(KernelACL_TC, dwise_conv2d_1) {
+ uint32_t input_n = 1;
+ uint32_t input_h = 3;
+ uint32_t input_w = 3;
+ uint32_t input_c = 1;
+ uint32_t filter_h = 3;
+ uint32_t filter_w = 3;
+ uint32_t filter_c = 1;
+ uint32_t out_h = 1;
+ uint32_t out_w = 1;
+
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t depth_multiplier = 1;
+
+ util::TensorWrapper input({input_n, input_h, input_w, input_c});
+ util::TensorWrapper weights({1, filter_h, filter_w, filter_c});
+ util::TensorWrapper bias({filter_c});
+ util::TensorWrapper output({1, out_h, out_w, filter_c});
+
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+
+ input.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ uint32_t N = input_n;
+ uint32_t H = input_h;
+ uint32_t W = input_w;
+ uint32_t C = input_c;
+
+ return n*H*W*C + h*W*C + w*C + c;
+ });
+ weights.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ uint32_t N = 1;
+ uint32_t H = filter_h;
+ uint32_t W = filter_w;
+ uint32_t C = filter_c;
+
+ return n*H*W*C + h*W*C + w*C + c;
+ });
+ bias.initValue([](uint32_t w) {
+ return 0.f;
+ });
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(),
+ weights.ptr<float>(), weights.shape(),
+ bias.ptr<float>(), bias.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ depth_multiplier, activation,
+ output.ptr<float>(), output.shape());
+
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1, out_h, out_w, filter_c});
+ expected.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 204.f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+ACL_TEST(KernelACL_TC, dwise_conv2d_multi_channel) {
+ uint32_t input_n = 1;
+ uint32_t input_h = 3;
+ uint32_t input_w = 3;
+ uint32_t input_c = 3;
+ uint32_t filter_h = 3;
+ uint32_t filter_w = 3;
+ uint32_t filter_c = input_c;
+ uint32_t out_h = 1;
+ uint32_t out_w = 1;
+
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t depth_multiplier = 1;
+
+ util::TensorWrapper input({input_n, input_h, input_w, input_c});
+ util::TensorWrapper weights({1, filter_h, filter_w, filter_c});
+ util::TensorWrapper bias({filter_c});
+ util::TensorWrapper output({1, out_h, out_w, filter_c});
+
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+
+ input.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ uint32_t N = input_n;
+ uint32_t H = input_h;
+ uint32_t W = input_w;
+ uint32_t C = input_c;
+
+ return n*H*W*C + h*W*C + w*C + c;
+ });
+ weights.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ uint32_t N = 1;
+ uint32_t H = filter_h;
+ uint32_t W = filter_w;
+ uint32_t C = filter_c;
+
+ return n*H*W*C + h*W*C + w*C + c;
+ });
+ bias.initValue([](uint32_t w) {
+ return 0.f;
+ });
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(),
+ weights.ptr<float>(), weights.shape(),
+ bias.ptr<float>(), bias.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ depth_multiplier, activation,
+ output.ptr<float>(), output.shape());
+
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1, out_h, out_w, filter_c});
+ expected.initValue({
+ 1836.f,
+ 2061.f,
+ 2304.f
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+ACL_TEST(KernelACL_TC, dwise_conv2d_inception_1) {
+ uint32_t input_n = 1;
+ uint32_t input_h = 112;
+ uint32_t input_w = 112;
+ uint32_t input_c = 32;
+ uint32_t filter_h = 3;
+ uint32_t filter_w = 3;
+ uint32_t filter_c = input_c;
+ uint32_t out_h = 112;
+ uint32_t out_w = 112;
+
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t depth_multiplier = 1;
+
+ util::TensorWrapper input({input_n, input_h, input_w, input_c});
+ util::TensorWrapper weights({1, filter_h, filter_w, filter_c});
+ util::TensorWrapper bias({filter_c});
+ util::TensorWrapper output({1, out_h, out_w, filter_c});
+
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU6);
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return c;
+ });
+ weights.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return c;
+ });
+ bias.initValue([](uint32_t w) {
+ return 0.f;
+ });
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(),
+ weights.ptr<float>(), weights.shape(),
+ bias.ptr<float>(), bias.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ depth_multiplier, activation,
+ output.ptr<float>(), output.shape());
+
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1, out_h, out_w, filter_c});
+ expected.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ float v = 9.f;
+ if( h == 0 || h == out_h-1 )
+ v -= 3.f;
+ if( w == 0 || w == out_w-1 )
+ v -= 3.f;
+
+ // four corners
+ if( (w == 0 && h == 0)
+ || (w == 0 && h == out_h-1)
+ || (w == out_w-1 && h == 0)
+ || (w == out_w-1 && h == out_h-1) )
+ v += 1.f;
+
+ // Assumption: negative numbers cannot appear because
+ // only positive numbers exist in the input and weights.
+ float ret = c*c*v;
+ return std::min(ret, 6.f);
+ });
+
+ EXPECT_EQ(output, expected);
+}
diff --git a/libs/kernel/acl/src/FullyConnected.h b/libs/kernel/acl/src/FullyConnected.h
new file mode 100644
index 000000000..5030a8548
--- /dev/null
+++ b/libs/kernel/acl/src/FullyConnected.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_FULLYCONNECTED_COMMON_H__
+#define __NNFW_KERNEL_ACL_FULLYCONNECTED_COMMON_H__
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include <arm_compute/runtime/IFunction.h>
+
+#include "shape.h"
+#include "IO_accessor.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+namespace common {
+
+typedef std::function<void (void)> sync_scheduler_f;
+
+template<class TensorT, class LayerT, class ActT>
+bool fullyConnectedFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* weightsData, const nnfw::rt::Shape& weightsShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape,
+ sync_scheduler_f sync_scheduler) {
+
+ // NNAPI specification: https://developer.android.com/ndk/reference/group___neural_networks.html#ggaabbe492c60331b13038e39d4207940e0aaada7a3dbaf4676aba560c933ff610c5
+
+ // According to the NNAPI Specification,
+ // INPUT
+ // 1. input rank is up to 4.
+ // 2. if input rank > 2, it is flattened to rank 2 [batch_size, input_size]
+ nnfw::rt::Shape flattenedInputShape = inputShape;
+ switch(inputShape.dimensions.size()) {
+ case 1:
+ {
+ assert("Need to be implemented." && 0);
+ break;
+ }
+ case 2:
+ {
+ // DO NOTHING.
+ break;
+ }
+ case 3:
+ {
+ assert("Need to be implemented." && 0);
+ break;
+ }
+ case 4:
+ {
+ auto N = inputShape.dimensions[0];
+ auto H = inputShape.dimensions[1];
+ auto W = inputShape.dimensions[2];
+ auto C = inputShape.dimensions[3];
+ flattenedInputShape.dimensions = {N, H*W*C};
+ break;
+ }
+ default:
+ assert(inputShape.dimensions.size() <= 4);
+ }
+ // Finally, flattenedInputShape is a 2D tensor.
+
+ // WEIGHTS is a 2D tensor
+ assert(weightsShape.dimensions.size() == 2);
+
+ // BIAS is a 1D tensor
+ assert(biasShape.dimensions.size() == 1);
+
+ // OUTPUT is a 2D tensor.
+ assert(outputShape.dimensions.size() == 2);
+
+ auto input_shape = util::fromNNShape(flattenedInputShape);
+ auto weights_shape = util::fromNNShape(weightsShape);
+ auto bias_shape = util::fromNNShape(biasShape);
+ auto output_shape = util::fromNNShape(outputShape);
+
+ assert(activation == ANEURALNETWORKS_FUSED_NONE || activation == ANEURALNETWORKS_FUSED_RELU);
+
+ std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+ TensorT input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ TensorT output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+ TensorT bias(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32));
+ TensorT weights(arm_compute::TensorInfo(weights_shape, arm_compute::Format::F32));
+
+ auto fc = std::make_shared<LayerT>();
+ fc->configure(input.ptr(), weights.ptr(), bias.ptr(), output.ptr());
+
+ fns.emplace_back(fc);
+
+ if (ANEURALNETWORKS_FUSED_RELU == activation)
+ {
+ auto relu_f = std::make_shared<ActT>();
+
+ const arm_compute::ActivationLayerInfo relu_info{arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+
+ // Do in-place update
+ relu_f->configure(output.ptr(), nullptr, relu_info);
+
+ fns.emplace_back(relu_f);
+ }
+
+ input.allocate();
+ output.allocate();
+ bias.allocate();
+ weights.allocate();
+
+ // TODO: Do we need 2D tensor accessor for the input feature?
+ TensorAccess<MatrixWeightAccessor>(input.ref(), inputData, inputShape);
+ TensorAccess<BiasAccessor>(bias.ref(), biasData, biasShape);
+ TensorAccess<MatrixWeightAccessor>(weights.ref(), weightsData, weightsShape);
+
+ for (const auto &fn : fns)
+ {
+ fn->run();
+ }
+
+ sync_scheduler();
+
+ TensorAccess<MatrixOutputAccessor>(output.ref(), outputData, outputShape);
+
+ return true;
+}
+
+} // namespace common
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_FULLYCONNECTED_COMMON_H__
diff --git a/libs/kernel/acl/src/FullyConnected.test.h b/libs/kernel/acl/src/FullyConnected.test.h
new file mode 100644
index 000000000..01bbff802
--- /dev/null
+++ b/libs/kernel/acl/src/FullyConnected.test.h
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <kernel/acl/FullyConnected.h>
+
+// TODO: fix include path in CMakeFiles
+#include "util.h"
+
+#ifndef ACL_TEST
+#error "ACL_TEST should be defined first!"
+#endif // ACL_TEST
+
+#ifndef ACL_CORE_FUNC_NAME
+#error "ACL_CORE_FUNC_NAME should be defined first!"
+#endif // ACL_CORE_FUNC_NAME
+
+using namespace nnfw::kernel::acl;
+using fullyConnectedFloat32T = bool (*)(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* weightsData, const nnfw::rt::Shape& weightsShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+ACL_TEST(KernelACL_TC, fcFloat32_1) {
+
+ util::TensorWrapper input({1,1,1,100});
+ util::TensorWrapper weights({1,100});
+ util::TensorWrapper bias({1});
+ util::TensorWrapper output({1,1});
+
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.f;
+ });
+ weights.initValue([](uint32_t h, uint32_t w) {
+ return 1.f;
+ });
+ bias.initValue([](uint32_t w) {
+ return 0.f;
+ });
+ output.initValue([](uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(),
+ weights.ptr<float>(), weights.shape(),
+ bias.ptr<float>(), bias.shape(),
+ activation,
+ output.ptr<float>(), output.shape());
+
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1,1});
+ expected.initValue([](uint32_t h, uint32_t w) {
+ return 100.f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+ACL_TEST(KernelACL_TC, fcFloat32_relu) {
+
+ util::TensorWrapper input({1,1,1,100});
+ util::TensorWrapper weights({1,100});
+ util::TensorWrapper bias({1});
+ util::TensorWrapper output({1,1});
+
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.f;
+ });
+ weights.initValue([](uint32_t h, uint32_t w) {
+ return -1.f;
+ });
+ bias.initValue([](uint32_t w) {
+ return 0.f;
+ });
+ output.initValue([](uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(),
+ weights.ptr<float>(), weights.shape(),
+ bias.ptr<float>(), bias.shape(),
+ activation,
+ output.ptr<float>(), output.shape());
+
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1,1});
+ expected.initValue([](uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+ACL_TEST(KernelACL_TC, fcFloat32_conv_fc) {
+ uint32_t input_n = 1;
+ uint32_t input_c = 5;
+ uint32_t input_h = 4;
+ uint32_t input_w = 4;
+ uint32_t weight_n = 6;
+
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+
+ util::TensorWrapper input({input_n, input_h, input_w, input_c});
+ util::TensorWrapper weight({weight_n, input_c*input_h*input_w});
+ util::TensorWrapper bias({weight_n});
+ util::TensorWrapper output({1, weight_n});
+
+ input.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ uint32_t N = input_n;
+ uint32_t H = input_h;
+ uint32_t W = input_w;
+ uint32_t C = input_c;
+
+ return n*H*W*C + h*W*C + w*C + c;
+ });
+
+ weight.initValue([&](uint32_t h, uint32_t w) {
+ uint32_t H = weight_n;
+ uint32_t W = input_c*input_h*input_w;
+
+ return h*W + w;
+ });
+
+ bias.initValue([](uint32_t w) {
+ return 0.f;
+ });
+
+ output.initValue([](uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(),
+ weight.ptr<float>(), weight.shape(),
+ bias.ptr<float>(), bias.shape(),
+ activation,
+ output.ptr<float>(), output.shape());
+
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1, weight_n});
+ expected.initValue({
+ 167480.f,
+ 420280.f,
+ 673080.f,
+ 925880.f,
+ 1178680.f,
+ 1431480.f});
+
+ EXPECT_EQ(output, expected);
+}
+
+ACL_TEST(KernelACL_TC, fcFloat32_fc_fc) {
+ uint32_t input_n = 6;
+ uint32_t weight_n = 6;
+
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+
+ util::TensorWrapper input({1, input_n});
+ util::TensorWrapper weight({weight_n, input_n});
+ util::TensorWrapper bias({weight_n});
+ util::TensorWrapper output({1, weight_n});
+
+ input.initValue([&](uint32_t h, uint32_t w) {
+ // not use h because h = 0.
+ return (float)w;
+ });
+
+ weight.initValue([&](uint32_t h, uint32_t w) {
+ uint32_t H = weight_n;
+ uint32_t W = input_n;
+
+ return (float)(h*W + w);
+ });
+
+ bias.initValue([](uint32_t w) {
+ return 0.f;
+ });
+
+ output.initValue([](uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(),
+ weight.ptr<float>(), weight.shape(),
+ bias.ptr<float>(), bias.shape(),
+ activation,
+ output.ptr<float>(), output.shape());
+
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1, weight_n});
+ expected.initValue({
+ 55.f,
+ 145.f,
+ 235.f,
+ 325.f,
+ 415.f,
+ 505.f,
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+ACL_TEST(KernelACL_TC, fcFloat32_inceptionv3) {
+
+ uint32_t input_c = 2048;
+ uint32_t weight_n = 1008;
+
+ util::TensorWrapper input({1,1,1,input_c});
+ util::TensorWrapper weight({weight_n,input_c});
+ util::TensorWrapper bias({weight_n});
+ util::TensorWrapper output({1, weight_n});
+
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+
+ input.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.f;
+ });
+ weight.initValue([&](uint32_t h, uint32_t w) {
+ return (float)h;
+ });
+ bias.initValue([](uint32_t w) {
+ return 0.f;
+ });
+ output.initValue([](uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(),
+ weight.ptr<float>(), weight.shape(),
+ bias.ptr<float>(), bias.shape(),
+ activation,
+ output.ptr<float>(), output.shape());
+
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1, weight_n});
+ expected.initValue([&](uint32_t h, uint32_t w) {
+ return w*input_c;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
diff --git a/libs/kernel/acl/src/IO_accessor.cpp b/libs/kernel/acl/src/IO_accessor.cpp
new file mode 100644
index 000000000..410fb8ea5
--- /dev/null
+++ b/libs/kernel/acl/src/IO_accessor.cpp
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IO_accessor.h"
+
+#include <cassert>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+InputAccessor::InputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape)
+ : _inputData(inputData)
+ , _inputShape(inputShape)
+{
+}
+
+MatrixInputAccessor::MatrixInputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape)
+ : _inputData(inputData)
+ , _inputShape(inputShape)
+{
+}
+
+VectorInputAccessor::VectorInputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape)
+ : _inputData(inputData)
+ , _inputShape(inputShape)
+{
+}
+
+WeightAccessor::WeightAccessor(const float* filterData, const nnfw::rt::Shape& filterShape)
+ : _filterData(filterData)
+ , _filterShape(filterShape)
+{
+}
+
+MatrixWeightAccessor::MatrixWeightAccessor(const float* filterData, const nnfw::rt::Shape& filterShape)
+ : _filterData(filterData)
+ , _filterShape(filterShape)
+{
+}
+
+BiasAccessor::BiasAccessor(const float* biasData, const nnfw::rt::Shape& biasShape)
+ : _biasData(biasData)
+ , _biasShape(biasShape)
+{
+}
+
+OutputAccessor::OutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape)
+ : _outputData(outputData)
+ , _outputShape(outputShape)
+{
+}
+
+MatrixOutputAccessor::MatrixOutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape)
+ : _outputData(outputData)
+ , _outputShape(outputShape)
+{
+}
+
+VectorOutputAccessor::VectorOutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape)
+ : _outputData(outputData)
+ , _outputShape(outputShape)
+{
+}
+
+static uint32_t getOffsetNCHW(const nnfw::rt::Shape& shape, const arm_compute::Coordinates& id)
+{
+ // get offset for ACL(NCHW) from data of NNAPI(NHWC)
+ uint32_t num = getSizeOfDimension(shape, 0);
+ uint32_t height = getSizeOfDimension(shape, 1);
+ uint32_t width = getSizeOfDimension(shape, 2);
+ uint32_t chann = getSizeOfDimension(shape, 3);
+ uint32_t stride = 1;
+ uint32_t offset = 0;
+ uint32_t numdim = id.num_dimensions();
+ offset += numdim > 0 ? id[0] * stride : 0; stride *= width;
+ offset += numdim > 1 ? id[1] * stride : 0; stride *= height;
+ offset += numdim > 2 ? id[2] * stride : 0; stride *= chann;
+ offset += numdim > 3 ? id[3] * stride : 0; stride *= num;
+ return offset;
+}
+
+static uint32_t getElementOffset(const nnfw::rt::Shape& shape,
+ uint32_t ch, uint32_t row, uint32_t col)
+{
+ assert(getSizeOfDimension(shape, 0) == 1);
+ assert(shape.dimensions.size() == 4);
+
+ // TODO Optimize this!
+ const uint32_t W = getSizeOfDimension(shape, 2);
+ const uint32_t C = getSizeOfDimension(shape, 3);
+
+ int offset = 0;
+
+ // NNAPI uses NHWC ordering
+ offset += row * W * C;
+ offset += col * C;
+ offset += ch;
+
+ return offset;
+}
+
+static uint32_t getElementOffset(const nnfw::rt::Shape& shape,
+ uint32_t nth, uint32_t ch, uint32_t row, uint32_t col)
+{
+ assert(shape.dimensions.size() == 4);
+
+ // TODO Optimize this!
+ const uint32_t H = getSizeOfDimension(shape, 1);
+ const uint32_t W = getSizeOfDimension(shape, 2);
+ const uint32_t C = getSizeOfDimension(shape, 3);
+
+ int offset = 0;
+
+ // NNAPI uses NHWC ordering
+ offset += nth * H * W * C;
+ offset += row * W * C;
+ offset += col * C;
+ offset += ch;
+
+ return offset;
+}
+
+bool InputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ const uint32_t ch = id[2];
+ const uint32_t row = id[1];
+ const uint32_t col = id[0];
+
+ uint32_t offset = getElementOffset(_inputShape, ch, row, col);
+
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+ *(_inputData + offset);
+ });
+ return true;
+}
+
+bool MatrixInputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ assert(tensor.info()->tensor_shape().num_dimensions() <= 2);
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ const auto row = id[1];
+ const auto col = id[0];
+ const auto W = tensor.info()->tensor_shape().x();
+
+ const auto offset = row * W + col;
+
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+ *(_inputData + offset);
+ });
+ return true;
+}
+
+bool VectorInputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ assert(tensor.info()->tensor_shape().num_dimensions() == 1);
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ uint32_t offset = id[0];
+
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+ *(_inputData + offset);
+ });
+ return true;
+}
+
+bool WeightAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ const uint32_t nth = id[3];
+ const uint32_t ch = id[2];
+ const uint32_t row = id[1];
+ const uint32_t col = id[0];
+
+ uint32_t offset = getElementOffset(_filterShape, nth, ch, row, col);
+
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+ *(_filterData + offset);
+ });
+ return true;
+}
+
+bool MatrixWeightAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ assert(tensor.info()->tensor_shape().num_dimensions() <= 2);
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ const auto row = id[1];
+ const auto col = id[0];
+ const auto W = tensor.info()->tensor_shape().x();
+
+ uint32_t offset = row * W + col;
+
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+ *(_filterData + offset);
+ });
+ return true;
+}
+
+bool BiasAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ uint32_t offset = getOffsetNCHW(_biasShape, id);
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+ *(_biasData + offset);
+ });
+ return true;
+}
+
+bool OutputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ const uint32_t ch = id[2];
+ const uint32_t row = id[1];
+ const uint32_t col = id[0];
+
+ uint32_t offset = getElementOffset(_outputShape, ch, row, col);
+
+ *(_outputData + offset) =
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id));
+ });
+ return false; // end the network
+}
+
+bool VectorOutputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ assert(tensor.info()->tensor_shape().num_dimensions() == 1);
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ const uint32_t x = id[0];
+
+ uint32_t offset = x;
+
+ *(_outputData + offset) =
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id));
+ });
+ return false; // end the network
+}
+
+bool MatrixOutputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ assert(tensor.info()->tensor_shape().num_dimensions() <= 2);
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ const auto row = id[1];
+ const auto col = id[0];
+ const auto W = tensor.info()->tensor_shape().x();
+
+ const auto offset = row * W + col;
+
+ *(_outputData + offset) =
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id));
+ });
+ return false; // end the network
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/IO_accessor.h b/libs/kernel/acl/src/IO_accessor.h
new file mode 100644
index 000000000..e7670f15c
--- /dev/null
+++ b/libs/kernel/acl/src/IO_accessor.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_IO_ACCESSOR_H__
+#define __NNFW_KERNEL_ACL_IO_ACCESSOR_H__
+
+#include <arm_compute/graph/ITensorAccessor.h>
+#include <arm_compute/runtime/CL/CLFunctions.h>
+#include <arm_compute/runtime/NEON/NEFunctions.h>
+
+#include <OperationsUtils.h> // for nnfw::rt::Shape
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+class InputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ InputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape);
+ InputAccessor(InputAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ const float* _inputData;
+ const nnfw::rt::Shape& _inputShape;
+};
+
+class MatrixInputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ MatrixInputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape);
+ MatrixInputAccessor(MatrixInputAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ const float* _inputData;
+ const nnfw::rt::Shape& _inputShape;
+};
+
+class VectorInputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ VectorInputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape);
+ VectorInputAccessor(VectorInputAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ const float* _inputData;
+ const nnfw::rt::Shape& _inputShape;
+};
+
+class WeightAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ WeightAccessor(const float* filterData, const nnfw::rt::Shape& filterShape);
+ WeightAccessor(WeightAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ const float* _filterData;
+ const nnfw::rt::Shape& _filterShape;
+};
+
+class MatrixWeightAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ MatrixWeightAccessor(const float* filterData, const nnfw::rt::Shape& filterShape);
+ MatrixWeightAccessor(MatrixWeightAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ const float* _filterData;
+ const nnfw::rt::Shape& _filterShape;
+};
+
+class BiasAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ BiasAccessor(const float* biasData, const nnfw::rt::Shape& biasShape);
+ BiasAccessor(BiasAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ const float* _biasData;
+ const nnfw::rt::Shape& _biasShape;
+};
+
+class OutputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ OutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape);
+ OutputAccessor(OutputAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ float* _outputData;
+ const nnfw::rt::Shape& _outputShape;
+};
+
+class MatrixOutputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ MatrixOutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape);
+ MatrixOutputAccessor(MatrixOutputAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ float* _outputData;
+ const nnfw::rt::Shape& _outputShape;
+};
+
+class VectorOutputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ VectorOutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape);
+ VectorOutputAccessor(VectorOutputAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ float* _outputData;
+ const nnfw::rt::Shape& _outputShape;
+};
+
+template<typename AccessorType>
+inline void TensorAccess(arm_compute::CLTensor& tensor, const float* data,
+ const nnfw::rt::Shape& shape)
+{
+ tensor.map();
+ AccessorType accessor(data, shape);
+ accessor.access_tensor(tensor);
+ tensor.unmap();
+}
+
+template<typename AccessorType>
+inline void TensorAccess(arm_compute::CLTensor& tensor, float* data,
+ const nnfw::rt::Shape& shape)
+{
+ tensor.map();
+ AccessorType accessor(data, shape);
+ accessor.access_tensor(tensor);
+ tensor.unmap();
+}
+
+template<typename AccessorType>
+inline void TensorAccess(arm_compute::Tensor& tensor, const float* data,
+ const nnfw::rt::Shape& shape)
+{
+ AccessorType accessor(data, shape);
+ accessor.access_tensor(tensor);
+}
+
+template<typename AccessorType>
+inline void TensorAccess(arm_compute::Tensor& tensor, float* data,
+ const nnfw::rt::Shape& shape)
+{
+ AccessorType accessor(data, shape);
+ accessor.access_tensor(tensor);
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_IO_ACCESSOR_H__
diff --git a/libs/kernel/acl/src/Init_acl.cpp b/libs/kernel/acl/src/Init_acl.cpp
new file mode 100644
index 000000000..cabf079fa
--- /dev/null
+++ b/libs/kernel/acl/src/Init_acl.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+// This will do one time initialization but can be called multiple times
+void Initialize(void)
+{
+ arm_compute::CLScheduler::get().default_init();
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/NEUniqueTensor.h b/libs/kernel/acl/src/NEUniqueTensor.h
new file mode 100644
index 000000000..34412f9e3
--- /dev/null
+++ b/libs/kernel/acl/src/NEUniqueTensor.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_NEUNIQUETENSOR_H__
+#define __NNFW_KERNEL_ACL_NEUNIQUETENSOR_H__
+
+#include <arm_compute/runtime/Tensor.h>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+// TODO: find a way to merge CLUniqueTensor and NEUniqueTensor.
+class NEUniqueTensor
+{
+public:
+ NEUniqueTensor(const ::arm_compute::TensorInfo &info)
+ {
+ _tensor.allocator()->init(info);
+ }
+
+public:
+ // Both copy and move are not allowed
+ NEUniqueTensor(const NEUniqueTensor &) = delete;
+ NEUniqueTensor(NEUniqueTensor &&) = delete;
+
+public:
+ ~NEUniqueTensor()
+ {
+ _tensor.allocator()->free();
+ }
+
+public:
+ void allocate()
+ {
+ _tensor.allocator()->allocate();
+ }
+
+public:
+ ::arm_compute::Tensor &ref(void) { return _tensor; }
+ ::arm_compute::Tensor *ptr(void) { return &_tensor; }
+
+private:
+ ::arm_compute::Tensor _tensor;
+};
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif //__NNFW_KERNEL_ACL_NEUNIQUETENSOR_H__
diff --git a/libs/kernel/acl/src/Reshape.h b/libs/kernel/acl/src/Reshape.h
new file mode 100644
index 000000000..ebd82477d
--- /dev/null
+++ b/libs/kernel/acl/src/Reshape.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_RESHAPE_COMMON_H__
+#define __NNFW_KERNEL_ACL_RESHAPE_COMMON_H__
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+// TODO: fix include path in CMakeFiles
+#include "IO_accessor.h"
+#include "shape.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+namespace common {
+
+typedef std::function<void (void)> sync_scheduler_f;
+
+template<class TensorT, class LayerT>
+bool reshapeGeneric(const void* inputData, const nnfw::rt::Shape& inputShape,
+ void* outputData, const nnfw::rt::Shape& outputShape,
+ sync_scheduler_f sync_scheduler) {
+
+ auto input_shape = util::fromNNShape(inputShape);
+ auto output_shape = util::fromNNShape(outputShape);
+
+ TensorT input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ TensorT output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ LayerT l;
+
+ l.configure(input.ptr(), output.ptr());
+
+ input.allocate();
+ output.allocate();
+
+ TensorAccess<InputAccessor>(input.ref(), (float*)inputData, inputShape);
+
+ l.run();
+
+ sync_scheduler();
+
+ TensorAccess<OutputAccessor>(output.ref(), (float*)outputData, outputShape);
+
+ return true;
+}
+
+} // namespace common
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_RESHAPE_COMMON_H__
diff --git a/libs/kernel/acl/src/Reshape.test.h b/libs/kernel/acl/src/Reshape.test.h
new file mode 100644
index 000000000..a96a896a6
--- /dev/null
+++ b/libs/kernel/acl/src/Reshape.test.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <kernel/acl/Reshape.h>
+
+// TODO: fix include path in CMakeFiles
+#include "util.h"
+
+#ifndef ACL_TEST
+#error "ACL_TEST should be defined first!"
+#endif // ACL_TEST
+
+#ifndef ACL_CORE_FUNC_NAME
+#error "ACL_CORE_FUNC_NAME should be defined first!"
+#endif // ACL_CORE_FUNC_NAME
+
+using namespace nnfw::kernel::acl;
+
+ACL_TEST(KernelACL_TC, reshape_1) {
+ const nnfw::rt::Shape inputShape = {OperandType::FLOAT32, {1,1,9,1}, 1.0, 0};
+ float inputData[9] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};
+
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float outputData[9] = {0};
+
+ bool bret = ACL_CORE_FUNC_NAME(inputData, inputShape,
+ outputData, outputShape);
+
+ EXPECT_EQ(bret, true);
+
+ float expectData[9] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+
+}
diff --git a/libs/kernel/acl/src/cl/Concatenation.cpp b/libs/kernel/acl/src/cl/Concatenation.cpp
new file mode 100644
index 000000000..9376006ca
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Concatenation.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+#include <cassert>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+#include "../shape.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+bool concatenationFloat32(const std::vector<const float*>& inputDataPtrs,
+ const std::vector<nnfw::rt::Shape>& inputShapes, int32_t axis,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ if (axis != 3)
+ {
+ assert("Only support axis=3 for ACL" && 0);
+ return false;
+ }
+ assert(inputDataPtrs.size() == inputShapes.size());
+
+ std::vector<arm_compute::CLTensor*> inputPtrs;
+ std::vector<arm_compute::ICLTensor*> inputIptrs;
+ arm_compute::CLTensor output;
+
+ // init Tensors
+ std::vector<nnfw::rt::Shape>::const_iterator it_inputShape = inputShapes.begin();
+ for (auto inputData : inputDataPtrs)
+ {
+ const nnfw::rt::Shape& inputShape = *it_inputShape;
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::CLTensor* inputPtr = new arm_compute::CLTensor();
+
+ inputPtr->allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ inputPtrs.push_back(inputPtr);
+ inputIptrs.push_back(inputPtr);
+
+ it_inputShape++;
+ }
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+ output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ // prepare ACL Concatenate and configure tensors
+ auto concat = std::make_shared<arm_compute::CLDepthConcatenateLayer>();
+ concat->configure(inputIptrs, &output);
+
+ // allocate Tensors
+ it_inputShape = inputShapes.begin();
+ std::vector<const float*>::const_iterator it_inputData = inputDataPtrs.begin();
+ for (auto inputPtr : inputPtrs)
+ {
+ inputPtr->allocator()->allocate();
+
+ const float* inputData = *it_inputData;
+ const nnfw::rt::Shape& inputShape = *it_inputShape;
+
+ TensorAccess<InputAccessor>(*inputPtr, inputData, inputShape);
+
+ it_inputShape++;
+ it_inputData++;
+ }
+ output.allocator()->allocate();
+
+ // run
+ concat->run();
+ arm_compute::CLScheduler::get().sync();
+
+ // get output
+ TensorAccess<OutputAccessor>(output, outputData, outputShape);
+
+ // cleanup
+ for (auto inputPtr : inputPtrs)
+ {
+ inputPtr->allocator()->free();
+ delete inputPtr;
+ }
+ output.allocator()->free();
+
+ return true;
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/cl/Concatenation.test.cpp b/libs/kernel/acl/src/cl/Concatenation.test.cpp
new file mode 100644
index 000000000..b2c5a5891
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Concatenation.test.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <kernel/acl/Concatenation.h>
+
+// TODO: fix include path in CMakeFiles
+#include "../util.h"
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, concatFloat32_1)
+{
+ float inputData_1[6] = {
+ 1, 2, 3, 4, 5, 6 // [ [ [1],[2],[3] ], [ [4],[5],[6] ] ]
+ };
+ float inputData_2[6] = {
+ 7, 8, 9, 10, 11, 12 // [ [ [7],[8],[9] ], [ [10],[11],[12] ] ]
+ };
+ const nnfw::rt::Shape inputShape_1 = { OperandType::FLOAT32, {1,2,3,1}, 1.0, 0 };
+ const nnfw::rt::Shape inputShape_2 = { OperandType::FLOAT32, {1,2,3,1}, 1.0, 0 };
+ std::vector<const float*> inputDataPtrs;
+ std::vector<nnfw::rt::Shape> inputShapes;
+ float outputData[12];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,2,3,2}, 1.0, 0 };
+ bool bret;
+
+ inputDataPtrs.push_back(inputData_1);
+ inputDataPtrs.push_back(inputData_2);
+ inputShapes.push_back(inputShape_1);
+ inputShapes.push_back(inputShape_2);
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = concatenationFloat32(inputDataPtrs, inputShapes, 3,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectNCHW[] = {
+ 1, 2, 3, 4, 5, 6,
+ 7, 8, 9, 10, 11, 12
+ };
+ float expectData[12]; // [ [ [1,7],[2,8],[3,9] ], [ [4,10],[5,11],[6,12] ] ]
+ util::NCHW2NHWC(expectNCHW, expectData, outputShape);
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
diff --git a/libs/kernel/acl/src/cl/Conv2D.cpp b/libs/kernel/acl/src/cl/Conv2D.cpp
new file mode 100644
index 000000000..4783bdc1d
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Conv2D.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <NeuralNetworks.h>
+
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+#include <util/environment.h>
+
+#include "../IO_accessor.h"
+#include "../util.h"
+#include "../shape.h"
+#include "../CLUniqueTensor.h"
+#include "../support.h"
+
+#include "util/feature/TextFormatter.h"
+
+#include "support/nnapi/feature/Reader.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+static int verbose = 0;
+
+bool convFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* filterData, const nnfw::rt::Shape& filterShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape filter_shape = util::fromNNShape(filterShape);
+ arm_compute::TensorShape bias_shape = util::fromVectorNNShape(biasShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+ arm_compute::PadStrideInfo conv_info = arm_compute::PadStrideInfo(stride_width, stride_height,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+ CLUniqueTensor bias(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32));
+ CLUniqueTensor filter(arm_compute::TensorInfo(filter_shape, arm_compute::Format::F32));
+
+ std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+ auto conv_f = std::make_shared<arm_compute::CLConvolutionLayer>();
+
+ conv_f->configure(input.ptr(), filter.ptr(), bias.ptr(), output.ptr(), conv_info);
+
+ fns.emplace_back(conv_f);
+
+ util::insertFusedActivationLayer<CLUniqueTensor, arm_compute::CLActivationLayer>(output, activation, fns);
+
+ input.allocate();
+ output.allocate();
+ bias.allocate();
+ filter.allocate();
+
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+ TensorAccess<BiasAccessor>(bias.ref(), biasData, biasShape);
+ TensorAccess<WeightAccessor>(filter.ref(), filterData, filterShape);
+
+ nnfw::util::env::IntAccessor("CONV2D_VERBOSE").access(verbose);
+ if (verbose)
+ {
+ input.ref().map();
+ auto ifm_shape = nnfw::support::nnapi::feature::asFeatureShape(inputShape);
+ nnfw::support::nnapi::feature::Reader<float> nnapi_ifm_reader{ifm_shape, inputData};
+ nnfw::support::acl::feature::Reader<float> acl_ifm_reader{input.ptr()};
+
+ std::cout << "NNAPI IFM:" << std::endl;
+ std::cout << nnfw::util::feature::TextFormatter<float>{ifm_shape, nnapi_ifm_reader} << std::endl;
+
+ std::cout << "ARM Compute IFM:" << std::endl;
+ std::cout << nnfw::util::feature::TextFormatter<float>{ifm_shape, acl_ifm_reader} << std::endl;
+ input.ref().unmap();
+ }
+
+ for (const auto &fn : fns)
+ {
+ fn->run();
+ }
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+
+ return true;
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/cl/Conv2D.test.cpp b/libs/kernel/acl/src/cl/Conv2D.test.cpp
new file mode 100644
index 000000000..e34cdeea5
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Conv2D.test.cpp
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <kernel/acl/Conv2D.h>
+
+// TODO: fix include path in CMakeFiles
+#include "../util.h"
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, convFloat32_3x3to1x1)
+{
+ float inputData[9];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float filterData[9];
+ const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float biasData[1] = { 1.0 };
+ const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 10.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, convFloat32_3x3to3x3)
+{
+ float inputData[9];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float filterData[9];
+ const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float biasData[1] = { 1.0 };
+ const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 };
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float outputData[9];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = {
+ 5.0f, 7.0f, 5.0f,
+ 7.0f, 10.0f, 7.0f,
+ 5.0f, 7.0f, 5.0f
+ };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, convFloat32_3x3to3x3_RELU)
+{
+ float inputData[9];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float filterData[9];
+ const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float biasData[1] = { -5.0f };
+ const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 };
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float outputData[9];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] =
+ {
+ 0.0f, 1.0f, 0.0f,
+ 1.0f, 4.0f, 1.0f,
+ 0.0f, 1.0f, 0.0f
+ };
+
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, convFloat32_3x5to3x3)
+{
+ float inputData[15] = {
+ 1,2,3,4,5,
+ 6,7,8,9,10,
+ 11,12,13,14,15
+ };
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,5,1}, 1.0, 0 };
+ float filterData[18] = {
+ 1,1,1, 1,1,1, 1,1,1,
+ 2,2,2, 2,2,2, 2,2,2
+ };
+ const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {2,3,3,1}, 1.0, 0 };
+ float biasData[2] = { 1.0, 1.0 };
+ const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {2}, 1.0, 0 };
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float outputData[30];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,5,2}, 1.0, 0 };
+ bool bret;
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectNCHW[] = {
+ 17.0f, 28.0f, 34.0f, 40.0f, 29.0f,
+ 40.0f, 64.0f, 73.0f, 82.0f, 58.0f,
+ 37.0f, 58.0f, 64.0f, 70.0f, 49.0f,
+
+ 33.0f, 55.0f, 67.0f, 79.0f, 57.0f,
+ 79.0f, 127.0f, 145.0f, 163.0f, 115.0f,
+ 73.0f, 115.0f, 127.0f, 139.0f, 97.0f
+ };
+ float expectData[30];
+ util::NCHW2NHWC(expectNCHW, expectData, outputShape);
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
diff --git a/libs/kernel/acl/src/cl/DepthwiseConv2D.cpp b/libs/kernel/acl/src/cl/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..7593a99f4
--- /dev/null
+++ b/libs/kernel/acl/src/cl/DepthwiseConv2D.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+#include <cassert>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../CLUniqueTensor.h"
+#include "../DepthwiseConv2D.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+static void sync_scheduler() {
+ arm_compute::CLScheduler::get().sync();
+}
+
+bool depthwiseConvFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* filterData, const nnfw::rt::Shape& filterShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t depth_multiplier, int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape) {
+ return common::depthwiseConvFloat32<CLUniqueTensor, arm_compute::CLDepthwiseConvolutionLayer,
+ arm_compute::CLActivationLayer>(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ depth_multiplier, activation,
+ outputData, outputShape,
+ sync_scheduler);
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
diff --git a/libs/kernel/acl/src/cl/DepthwiseConv2D.test.cpp b/libs/kernel/acl/src/cl/DepthwiseConv2D.test.cpp
new file mode 100644
index 000000000..695563383
--- /dev/null
+++ b/libs/kernel/acl/src/cl/DepthwiseConv2D.test.cpp
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define ACL_CORE_FUNC_NAME depthwiseConvFloat32
+#define ACL_TEST(tc, t) TEST(tc, cl_##t)
+
+#include "../DepthwiseConv2D.test.h"
diff --git a/libs/kernel/acl/src/cl/FullyConnected.cpp b/libs/kernel/acl/src/cl/FullyConnected.cpp
new file mode 100644
index 000000000..7513355ab
--- /dev/null
+++ b/libs/kernel/acl/src/cl/FullyConnected.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+#include <cassert>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../CLUniqueTensor.h"
+#include "../FullyConnected.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+void sync_scheduler() {
+ arm_compute::CLScheduler::get().sync();
+}
+
+bool fullyConnectedFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* weightsData, const nnfw::rt::Shape& weightsShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape) {
+ return common::fullyConnectedFloat32<CLUniqueTensor, arm_compute::CLFullyConnectedLayer,
+ arm_compute::CLActivationLayer>(inputData, inputShape,
+ weightsData, weightsShape,
+ biasData, biasShape,
+ activation,
+ outputData, outputShape,
+ sync_scheduler);
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/cl/FullyConnected.test.cpp b/libs/kernel/acl/src/cl/FullyConnected.test.cpp
new file mode 100644
index 000000000..b1f5a095f
--- /dev/null
+++ b/libs/kernel/acl/src/cl/FullyConnected.test.cpp
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define ACL_CORE_FUNC_NAME fullyConnectedFloat32
+#define ACL_TEST(tc, t) TEST(tc, cl_##t)
+
+#include "../FullyConnected.test.h"
diff --git a/libs/kernel/acl/src/cl/Pooling.cpp b/libs/kernel/acl/src/cl/Pooling.cpp
new file mode 100644
index 000000000..e22eacccc
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Pooling.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../CLUniqueTensor.h"
+
+#include <cassert>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+bool maxPoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+
+ std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+ arm_compute::PadStrideInfo pad_info = arm_compute::PadStrideInfo(stride_width, stride_height,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ arm_compute::PoolingLayerInfo maxpool_info = arm_compute::PoolingLayerInfo(arm_compute::PoolingType::MAX,
+ arm_compute::Size2D(filter_width,filter_height),
+ pad_info, false);
+
+ CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ auto pool_f = std::make_shared<arm_compute::CLPoolingLayer>();
+ pool_f->configure(input.ptr(), output.ptr(), maxpool_info);
+
+ fns.emplace_back(pool_f);
+
+ input.allocate();
+ output.allocate();
+
+ util::insertFusedActivationLayer<CLUniqueTensor, arm_compute::CLActivationLayer>(output, activation, fns);
+
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+
+ for (const auto &fn : fns)
+ {
+ fn->run();
+ }
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+
+ return true;
+}
+
+bool averagePoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+
+ std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+ arm_compute::PadStrideInfo pad_info = arm_compute::PadStrideInfo(stride_width, stride_height,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ arm_compute::PoolingLayerInfo pool_info = arm_compute::PoolingLayerInfo(arm_compute::PoolingType::AVG,
+ arm_compute::Size2D(filter_width,filter_height),
+ pad_info, true);
+
+ CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ auto pool_f = std::make_shared<arm_compute::CLPoolingLayer>();
+ pool_f->configure(input.ptr(), output.ptr(), pool_info);
+
+ fns.emplace_back(pool_f);
+
+ input.allocate();
+ output.allocate();
+
+ util::insertFusedActivationLayer<CLUniqueTensor, arm_compute::CLActivationLayer>(output, activation, fns);
+
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+
+ for (const auto &fn : fns)
+ {
+ fn->run();
+ }
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+
+ return true;
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/cl/Pooling.test.cpp b/libs/kernel/acl/src/cl/Pooling.test.cpp
new file mode 100644
index 000000000..8112e7a45
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Pooling.test.cpp
@@ -0,0 +1,482 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <arm_compute/core/Types.h>
+#include <kernel/acl/Pooling.h>
+
+#include "../util.h"
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, maxPoolFloat32_3x3to1x1)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value++;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bret = maxPoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 9.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, maxPoolFloat32_3x3to1x1_RELU)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ float value = -1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value--;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_RELU;
+
+ bret = maxPoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 0.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, maxPoolFloat32_3x3to2x2)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 1;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 2;
+ int32_t stride_height = 2;
+ int32_t filter_width = 2;
+ int32_t filter_height = 2;
+
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,2,2,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value++;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bret = maxPoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = {
+ 5.0f, 6.0f,
+ 8.0f, 9.0f
+ };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, maxPoolFloat32_147x147to73x73)
+{
+ util::TensorWrapper input({1,147,147,64});
+ util::TensorWrapper output({1,73,73,64});
+
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 2;
+ int32_t stride_height = 2;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bool bret = maxPoolFloat32(input.ptr<float>(), input.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1,73,73,64});
+ expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+TEST(KernelACL_TC, maxPoolFloat32_71x71to35x35)
+{
+ util::TensorWrapper input({1,71,71,192});
+ util::TensorWrapper output({1,35,35,192});
+
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 2;
+ int32_t stride_height = 2;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bool bret = maxPoolFloat32(input.ptr<float>(), input.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1,35,35,192});
+ expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+TEST(KernelACL_TC, averagePoolFloat32_3x3to1x1)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value++;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bret = averagePoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 5.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, averagePoolFloat32_3x3to1x1_RELU)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 3.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value--;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_RELU;
+
+ bret = averagePoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 0.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, averagePoolFloat32_3x3to2x2)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 2;
+ int32_t filter_height = 2;
+
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,2,2,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value++;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bret = averagePoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = {
+ 3.0f, 4.0f,
+ 6.0f, 7.0f
+ };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, averagePoolFloat32_3x3to3x3)
+{
+ std::vector<uint32_t> dims = {1,3,3,1};
+ util::TensorWrapper input(dims);
+ util::TensorWrapper output(dims);
+
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ int32_t value=1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value++;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bool bret = averagePoolFloat32(input.ptr<float>(), input.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected(dims);
+ float v=2.5f;
+ expected.initValue([&v](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ v = v + 0.5f;
+ return v;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+TEST(KernelACL_TC, averagePoolFloat32_35x35to35x35)
+{
+ int32_t N=35;
+ std::vector<uint32_t> dims = {1,35,35,768};
+ util::TensorWrapper input(dims);
+ util::TensorWrapper output(dims);
+
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bool bret = averagePoolFloat32(input.ptr<float>(), input.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected(dims);
+ expected.initValue([&N](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+TEST(KernelACL_TC, averagePoolFloat32_8x8to1x1)
+{
+ util::TensorWrapper input({1,8,8,2048});
+ util::TensorWrapper output({1,1,1,2048});
+
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 2;
+ int32_t stride_height = 2;
+ int32_t filter_width = 8;
+ int32_t filter_height = 8;
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bool bret = averagePoolFloat32(input.ptr<float>(), input.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1,1,1,2048});
+ expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
diff --git a/libs/kernel/acl/src/cl/Reshape.cpp b/libs/kernel/acl/src/cl/Reshape.cpp
new file mode 100644
index 000000000..e420ab92b
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Reshape.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../CLUniqueTensor.h"
+#include "../Reshape.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+static void sync_scheduler() {
+ arm_compute::CLScheduler::get().sync();
+}
+
+bool reshapeGeneric(const void* inputData, const nnfw::rt::Shape& inputShape,
+ void* outputData, const nnfw::rt::Shape& outputShape) {
+ return common::reshapeGeneric<CLUniqueTensor, arm_compute::CLReshapeLayer>
+ (inputData, inputShape, outputData, outputShape, sync_scheduler);
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/cl/Reshape.test.cpp b/libs/kernel/acl/src/cl/Reshape.test.cpp
new file mode 100644
index 000000000..db23a6d3d
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Reshape.test.cpp
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define ACL_CORE_FUNC_NAME reshapeGeneric
+#define ACL_TEST(tc, t) TEST(tc, cl_##t)
+
+#include "../Reshape.test.h"
diff --git a/libs/kernel/acl/src/cl/Softmax.cpp b/libs/kernel/acl/src/cl/Softmax.cpp
new file mode 100644
index 000000000..a628f05fe
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Softmax.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <NeuralNetworks.h>
+
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../CLUniqueTensor.h"
+#include "../util.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+bool softmaxFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float beta,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+
+ CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ auto softmax_f = std::make_shared<arm_compute::CLSoftmaxLayer>();
+ softmax_f->configure(input.ptr(), output.ptr(), beta);
+
+ input.allocate();
+ output.allocate();
+
+ if (inputShape.dimensions.size() == 4)
+ {
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+
+ softmax_f->run();
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+ }
+ else if (inputShape.dimensions.size() == 2)
+ {
+ TensorAccess<MatrixInputAccessor>(input.ref(), inputData, inputShape);
+
+ softmax_f->run();
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<MatrixOutputAccessor>(output.ref(), outputData, outputShape);
+ }
+ else
+ {
+ assert("undefined dimension of input" && 0);
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/cl/Softmax.test.cpp b/libs/kernel/acl/src/cl/Softmax.test.cpp
new file mode 100644
index 000000000..8ee8b41e2
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Softmax.test.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <arm_compute/core/Types.h>
+#include <kernel/acl/Softmax.h>
+
+#include "../util.h"
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, softmaxFloat32_1xn)
+{
+ float inputData[4];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,4}, 1.0, 0 };
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,4}, 1.0, 0 };
+ const float beta = 1.0f;
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = softmaxFloat32(inputData, inputShape, beta, outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 0.25f, 0.25f, 0.25f, 0.25f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, softmaxFloat32_4d)
+{
+ float inputData[4];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 };
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 };
+ const float beta = 1.0f;
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = softmaxFloat32(inputData, inputShape, beta, outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 0.25f, 0.25f, 0.25f, 0.25f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, softmaxFloat32_1xn_seq)
+{
+ float inputData[4];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,4}, 1.0, 0 };
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,4}, 1.0, 0 };
+ const float beta = 1.0f;
+ bool bret;
+
+ util::initData_Increasing(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = softmaxFloat32(inputData, inputShape, beta, outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = {0.032058603280085, 0.0871443187420326, 0.23688281808991, 0.643914259887972};
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, softmaxFloat32_4d_seq)
+{
+ float inputData[4];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 };
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 };
+ const float beta = 1.0f;
+ bool bret;
+
+ util::initData_Increasing(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = softmaxFloat32(inputData, inputShape, beta, outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = {0.032058603280085, 0.0871443187420326, 0.23688281808991, 0.643914259887972};
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
diff --git a/libs/kernel/acl/src/gtest_env.cpp b/libs/kernel/acl/src/gtest_env.cpp
new file mode 100644
index 000000000..f6fc52f7a
--- /dev/null
+++ b/libs/kernel/acl/src/gtest_env.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+
+class TestEnvironment : public ::testing::Environment
+{
+public:
+ virtual ~TestEnvironment() = default;
+
+ virtual void SetUp()
+ {
+ nnfw::kernel::acl::Initialize();
+ }
+
+ virtual void TearDown()
+ {
+ // DO NOTHING
+ }
+};
+
+static ::testing::Environment* const testingenv =
+ ::testing::AddGlobalTestEnvironment(new TestEnvironment);
diff --git a/libs/kernel/acl/src/neon/Concatenation.cpp b/libs/kernel/acl/src/neon/Concatenation.cpp
new file mode 100644
index 000000000..8738a9d12
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Concatenation.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+#include <cassert>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+#include "../shape.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+namespace neon {
+
+bool concatenationFloat32(const std::vector<const float*>& inputDataPtrs,
+ const std::vector<nnfw::rt::Shape>& inputShapes, int32_t axis,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ if (axis != 3)
+ {
+ assert("Only support axis=3 for ACL" && 0);
+ return false;
+ }
+ assert(inputDataPtrs.size() == inputShapes.size());
+
+ std::vector<arm_compute::Tensor*> inputPtrs;
+ std::vector<arm_compute::ITensor*> inputIptrs;
+ arm_compute::Tensor output;
+
+ // init Tensors
+ std::vector<nnfw::rt::Shape>::const_iterator it_inputShape = inputShapes.begin();
+ for (auto inputData : inputDataPtrs)
+ {
+ const nnfw::rt::Shape& inputShape = *it_inputShape;
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::Tensor* inputPtr = new arm_compute::Tensor();
+
+ inputPtr->allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ inputPtrs.push_back(inputPtr);
+ inputIptrs.push_back(inputPtr);
+
+ it_inputShape++;
+ }
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+ output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ // prepare ACL Concatenate and configure tensors
+ auto concat = std::make_shared<arm_compute::NEDepthConcatenateLayer>();
+ concat->configure(inputIptrs, &output);
+
+ // allocate Tensors
+ it_inputShape = inputShapes.begin();
+ std::vector<const float*>::const_iterator it_inputData = inputDataPtrs.begin();
+ for (auto inputPtr : inputPtrs)
+ {
+ inputPtr->allocator()->allocate();
+
+ const float* inputData = *it_inputData;
+ const nnfw::rt::Shape& inputShape = *it_inputShape;
+
+ TensorAccess<InputAccessor>(*inputPtr, inputData, inputShape);
+
+ it_inputShape++;
+ it_inputData++;
+ }
+ output.allocator()->allocate();
+
+ // run
+ concat->run();
+
+ // get output
+ TensorAccess<OutputAccessor>(output, outputData, outputShape);
+
+ // cleanup
+ for (auto inputPtr : inputPtrs)
+ {
+ inputPtr->allocator()->free();
+ delete inputPtr;
+ }
+ output.allocator()->free();
+
+ return true;
+}
+
+} // namespace neon
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/neon/Concatenation.test.cpp b/libs/kernel/acl/src/neon/Concatenation.test.cpp
new file mode 100644
index 000000000..03b05bd24
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Concatenation.test.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <kernel/acl/Concatenation.h>
+
+// TODO: fix include path in CMakeFiles
+#include "../util.h"
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, neon_concatFloat32_1)
+{
+ float inputData_1[6] = {
+ 1, 2, 3, 4, 5, 6 // [ [ [1],[2],[3] ], [ [4],[5],[6] ] ]
+ };
+ float inputData_2[6] = {
+ 7, 8, 9, 10, 11, 12 // [ [ [7],[8],[9] ], [ [10],[11],[12] ] ]
+ };
+ const nnfw::rt::Shape inputShape_1 = { OperandType::FLOAT32, {1,2,3,1}, 1.0, 0 };
+ const nnfw::rt::Shape inputShape_2 = { OperandType::FLOAT32, {1,2,3,1}, 1.0, 0 };
+ std::vector<const float*> inputDataPtrs;
+ std::vector<nnfw::rt::Shape> inputShapes;
+ float outputData[12];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,2,3,2}, 1.0, 0 };
+ bool bret;
+
+ inputDataPtrs.push_back(inputData_1);
+ inputDataPtrs.push_back(inputData_2);
+ inputShapes.push_back(inputShape_1);
+ inputShapes.push_back(inputShape_2);
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = neon::concatenationFloat32(inputDataPtrs, inputShapes, 3,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectNCHW[] = {
+ 1, 2, 3, 4, 5, 6,
+ 7, 8, 9, 10, 11, 12
+ };
+ float expectData[12]; // [ [ [1,7],[2,8],[3,9] ], [ [4,10],[5,11],[6,12] ] ]
+ util::NCHW2NHWC(expectNCHW, expectData, outputShape);
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
diff --git a/libs/kernel/acl/src/neon/Conv2D.cpp b/libs/kernel/acl/src/neon/Conv2D.cpp
new file mode 100644
index 000000000..679ecfced
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Conv2D.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <NeuralNetworks.h>
+
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+#include <util/environment.h>
+
+#include "../IO_accessor.h"
+#include "../util.h"
+#include "../shape.h"
+#include "../NEUniqueTensor.h"
+#include "../support.h"
+
+#include "util/feature/TextFormatter.h"
+
+#include "support/nnapi/feature/Reader.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+namespace neon {
+
+static int verbose = 0;
+
+bool convFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* filterData, const nnfw::rt::Shape& filterShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape filter_shape = util::fromNNShape(filterShape);
+ arm_compute::TensorShape bias_shape = util::fromVectorNNShape(biasShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+ arm_compute::PadStrideInfo conv_info = arm_compute::PadStrideInfo(stride_width, stride_height,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ NEUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ NEUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+ NEUniqueTensor bias(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32));
+ NEUniqueTensor filter(arm_compute::TensorInfo(filter_shape, arm_compute::Format::F32));
+
+ std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+ auto conv_f = std::make_shared<arm_compute::NEConvolutionLayer>();
+
+ conv_f->configure(input.ptr(), filter.ptr(), bias.ptr(), output.ptr(), conv_info);
+
+ fns.emplace_back(conv_f);
+
+ util::insertFusedActivationLayer<NEUniqueTensor, arm_compute::NEActivationLayer>(output, activation, fns);
+
+ input.allocate();
+ output.allocate();
+ bias.allocate();
+ filter.allocate();
+
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+ TensorAccess<BiasAccessor>(bias.ref(), biasData, biasShape);
+ TensorAccess<WeightAccessor>(filter.ref(), filterData, filterShape);
+
+ nnfw::util::env::IntAccessor("CONV2D_VERBOSE").access(verbose);
+ if (verbose)
+ {
+ auto ifm_shape = nnfw::support::nnapi::feature::asFeatureShape(inputShape);
+ nnfw::support::nnapi::feature::Reader<float> nnapi_ifm_reader{ifm_shape, inputData};
+ nnfw::support::acl::feature::Reader<float> acl_ifm_reader{ input.ptr() };
+
+ std::cout << "NNAPI IFM:" << std::endl;
+ std::cout << nnfw::util::feature::TextFormatter<float>{ifm_shape, nnapi_ifm_reader} << std::endl;
+
+ std::cout << "ARM Compute IFM:" << std::endl;
+ std::cout << nnfw::util::feature::TextFormatter<float>{ifm_shape, acl_ifm_reader} << std::endl;
+ }
+
+ for (const auto &fn : fns)
+ {
+ fn->run();
+ }
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+
+ return true;
+}
+
+} // namespace neon
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/neon/Conv2D.test.cpp b/libs/kernel/acl/src/neon/Conv2D.test.cpp
new file mode 100644
index 000000000..6a3de1c43
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Conv2D.test.cpp
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <kernel/acl/Conv2D.h>
+
+// TODO: fix include path in CMakeFiles
+#include "../util.h"
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, neon_convFloat32_3x3to1x1)
+{
+ float inputData[9];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float filterData[9];
+ const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float biasData[1] = { 1.0 };
+ const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = neon::convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 10.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_convFloat32_3x3to3x3)
+{
+ float inputData[9];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float filterData[9];
+ const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float biasData[1] = { 1.0 };
+ const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 };
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float outputData[9];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = neon::convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = {
+ 5.0f, 7.0f, 5.0f,
+ 7.0f, 10.0f, 7.0f,
+ 5.0f, 7.0f, 5.0f
+ };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_convFloat32_3x3to3x3_RELU)
+{
+ float inputData[9];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float filterData[9];
+ const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float biasData[1] = { -5.0f };
+ const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 };
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float outputData[9];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = neon::convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] =
+ {
+ 0.0f, 1.0f, 0.0f,
+ 1.0f, 4.0f, 1.0f,
+ 0.0f, 1.0f, 0.0f
+ };
+
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_convFloat32_3x5to3x3)
+{
+ float inputData[15] = {
+ 1,2,3,4,5,
+ 6,7,8,9,10,
+ 11,12,13,14,15
+ };
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,5,1}, 1.0, 0 };
+ float filterData[18] = {
+ 1,1,1, 1,1,1, 1,1,1,
+ 2,2,2, 2,2,2, 2,2,2
+ };
+ const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {2,3,3,1}, 1.0, 0 };
+ float biasData[2] = { 1.0, 1.0 };
+ const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {2}, 1.0, 0 };
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float outputData[30];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,5,2}, 1.0, 0 };
+ bool bret;
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = neon::convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectNCHW[] = {
+ 17.0f, 28.0f, 34.0f, 40.0f, 29.0f,
+ 40.0f, 64.0f, 73.0f, 82.0f, 58.0f,
+ 37.0f, 58.0f, 64.0f, 70.0f, 49.0f,
+
+ 33.0f, 55.0f, 67.0f, 79.0f, 57.0f,
+ 79.0f, 127.0f, 145.0f, 163.0f, 115.0f,
+ 73.0f, 115.0f, 127.0f, 139.0f, 97.0f
+ };
+ float expectData[30];
+ util::NCHW2NHWC(expectNCHW, expectData, outputShape);
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
diff --git a/libs/kernel/acl/src/neon/DepthwiseConv2D.cpp b/libs/kernel/acl/src/neon/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..bcf56c667
--- /dev/null
+++ b/libs/kernel/acl/src/neon/DepthwiseConv2D.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include <arm_compute/runtime/NEON/NEScheduler.h>
+
+#include <cassert>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../NEUniqueTensor.h"
+#include "../DepthwiseConv2D.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+namespace neon {
+static void sync_scheduler() {
+}
+
+bool depthwiseConvFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* filterData, const nnfw::rt::Shape& filterShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t depth_multiplier, int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape) {
+ return common::depthwiseConvFloat32<NEUniqueTensor, arm_compute::NEDepthwiseConvolutionLayer,
+ arm_compute::NEActivationLayer>(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ depth_multiplier, activation,
+ outputData, outputShape,
+ sync_scheduler);
+}
+
+} // namespace neon
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/neon/DepthwiseConv2D.test.cpp b/libs/kernel/acl/src/neon/DepthwiseConv2D.test.cpp
new file mode 100644
index 000000000..d729d538e
--- /dev/null
+++ b/libs/kernel/acl/src/neon/DepthwiseConv2D.test.cpp
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define ACL_CORE_FUNC_NAME neon::depthwiseConvFloat32
+#define ACL_TEST(tc, t) TEST(tc, neon_##t)
+
+#include "../DepthwiseConv2D.test.h"
diff --git a/libs/kernel/acl/src/neon/FullyConnected.cpp b/libs/kernel/acl/src/neon/FullyConnected.cpp
new file mode 100644
index 000000000..86229cbf2
--- /dev/null
+++ b/libs/kernel/acl/src/neon/FullyConnected.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include <arm_compute/runtime/NEON/NEScheduler.h>
+
+#include <cassert>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../NEUniqueTensor.h"
+#include "../FullyConnected.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+namespace neon {
+
+void sync_scheduler() {
+}
+
+bool fullyConnectedFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* weightsData, const nnfw::rt::Shape& weightsShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape) {
+
+ return common::fullyConnectedFloat32<NEUniqueTensor, arm_compute::NEFullyConnectedLayer,
+ arm_compute::NEActivationLayer>(inputData, inputShape,
+ weightsData, weightsShape,
+ biasData, biasShape,
+ activation,
+ outputData, outputShape,
+ sync_scheduler);
+}
+
+} // namespace neon
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
diff --git a/libs/kernel/acl/src/neon/FullyConnected.test.cpp b/libs/kernel/acl/src/neon/FullyConnected.test.cpp
new file mode 100644
index 000000000..d4c95e4cb
--- /dev/null
+++ b/libs/kernel/acl/src/neon/FullyConnected.test.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define ACL_CORE_FUNC_NAME neon::fullyConnectedFloat32
+#define ACL_TEST(tc, t) TEST(tc, neon_##t)
+
+#include "../FullyConnected.test.h"
+
diff --git a/libs/kernel/acl/src/neon/Pooling.cpp b/libs/kernel/acl/src/neon/Pooling.cpp
new file mode 100644
index 000000000..5c58ae0b5
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Pooling.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../NEUniqueTensor.h"
+
+#include <cassert>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+namespace neon {
+
+bool maxPoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+
+ std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+ arm_compute::PadStrideInfo pad_info = arm_compute::PadStrideInfo(stride_width, stride_height,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ arm_compute::PoolingLayerInfo maxpool_info = arm_compute::PoolingLayerInfo(arm_compute::PoolingType::MAX,
+ arm_compute::Size2D(filter_width,filter_height),
+ pad_info, false);
+
+ NEUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ NEUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ auto pool_f = std::make_shared<arm_compute::NEPoolingLayer>();
+ pool_f->configure(input.ptr(), output.ptr(), maxpool_info);
+
+ fns.emplace_back(pool_f);
+
+ util::insertFusedActivationLayer<NEUniqueTensor, arm_compute::NEActivationLayer>(output, activation, fns);
+
+ input.allocate();
+ output.allocate();
+
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+
+ for (const auto &fn : fns)
+ {
+ fn->run();
+ }
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+
+ return true;
+}
+
+bool averagePoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+
+ std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+ arm_compute::PadStrideInfo pad_info = arm_compute::PadStrideInfo(stride_width, stride_height,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ arm_compute::PoolingLayerInfo pool_info = arm_compute::PoolingLayerInfo(arm_compute::PoolingType::AVG,
+ arm_compute::Size2D(filter_width,filter_height),
+ pad_info, true);
+
+ NEUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ NEUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ auto pool_f = std::make_shared<arm_compute::NEPoolingLayer>();
+ pool_f->configure(input.ptr(), output.ptr(), pool_info);
+
+ fns.emplace_back(pool_f);
+
+ util::insertFusedActivationLayer<NEUniqueTensor, arm_compute::NEActivationLayer>(output, activation, fns);
+
+ input.allocate();
+ output.allocate();
+
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+
+ for (const auto &fn : fns)
+ {
+ fn->run();
+ }
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+
+ return true;
+}
+
+} // namespace neon
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/neon/Pooling.test.cpp b/libs/kernel/acl/src/neon/Pooling.test.cpp
new file mode 100644
index 000000000..4e6593921
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Pooling.test.cpp
@@ -0,0 +1,436 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <arm_compute/core/Types.h>
+#include <kernel/acl/Pooling.h>
+
+#include "../util.h"
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, neon_maxPoolFloat32_3x3to1x1)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value++;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bret = neon::maxPoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 9.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_maxPoolFloat32_3x3to1x1_RELU)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ float value = -1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value--;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_RELU;
+
+ bret = neon::maxPoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 0.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_maxPoolFloat32_3x3to2x2)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 1;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 2;
+ int32_t stride_height = 2;
+ int32_t filter_width = 2;
+ int32_t filter_height = 2;
+
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,2,2,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value++;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bret = neon::maxPoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = {
+ 5.0f, 6.0f,
+ 8.0f, 9.0f
+ };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_maxPoolFloat32_147x147to73x73)
+{
+ util::TensorWrapper input({1,147,147,64});
+ util::TensorWrapper output({1,73,73,64});
+
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 2;
+ int32_t stride_height = 2;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bool bret = neon::maxPoolFloat32(input.ptr<float>(), input.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1,73,73,64});
+ expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+TEST(KernelACL_TC, neon_maxPoolFloat32_71x71to35x35)
+{
+ util::TensorWrapper input({1,71,71,192});
+ util::TensorWrapper output({1,35,35,192});
+
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 2;
+ int32_t stride_height = 2;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bool bret = neon::maxPoolFloat32(input.ptr<float>(), input.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1,35,35,192});
+ expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+TEST(KernelACL_TC, neon_averagePoolFloat32_3x3to1x1)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value++;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bret = neon::averagePoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 5.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_averagePoolFloat32_3x3to1x1_RELU)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 3.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value--;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_RELU;
+
+ bret = neon::averagePoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 0.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_averagePoolFloat32_3x3to2x2)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 2;
+ int32_t filter_height = 2;
+
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,2,2,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value++;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bret = neon::averagePoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = {
+ 3.0f, 4.0f,
+ 6.0f, 7.0f
+ };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_averagePoolFloat32_35x35to35x35)
+{
+ std::vector<uint32_t> dims = {1,35,35,192};
+ util::TensorWrapper input(dims);
+ util::TensorWrapper output(dims);
+
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bool bret = neon::averagePoolFloat32(input.ptr<float>(), input.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected(dims);
+ expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+TEST(KernelACL_TC, neon_averagePoolFloat32_8x8to1x1)
+{
+ util::TensorWrapper input({1,8,8,2048});
+ util::TensorWrapper output({1,1,1,2048});
+
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 2;
+ int32_t stride_height = 2;
+ int32_t filter_width = 8;
+ int32_t filter_height = 8;
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bool bret = neon::averagePoolFloat32(input.ptr<float>(), input.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1,1,1,2048});
+ expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
diff --git a/libs/kernel/acl/src/neon/Reshape.cpp b/libs/kernel/acl/src/neon/Reshape.cpp
new file mode 100644
index 000000000..cef84c7f3
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Reshape.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../NEUniqueTensor.h"
+#include "../Reshape.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+namespace neon {
+
+static void sync_scheduler() {
+ arm_compute::CLScheduler::get().sync();
+}
+
+bool reshapeGeneric(const void* inputData, const nnfw::rt::Shape& inputShape,
+ void* outputData, const nnfw::rt::Shape& outputShape) {
+ return common::reshapeGeneric<NEUniqueTensor, arm_compute::NEReshapeLayer>
+ (inputData, inputShape, outputData, outputShape, sync_scheduler);
+}
+
+} // namespace neon
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
diff --git a/libs/kernel/acl/src/neon/Reshape.test.cpp b/libs/kernel/acl/src/neon/Reshape.test.cpp
new file mode 100644
index 000000000..9aca45e7e
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Reshape.test.cpp
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define ACL_CORE_FUNC_NAME neon::reshapeGeneric
+#define ACL_TEST(tc, t) TEST(tc, neon_##t)
+
+#include "../Reshape.test.h"
diff --git a/libs/kernel/acl/src/neon/Softmax.cpp b/libs/kernel/acl/src/neon/Softmax.cpp
new file mode 100644
index 000000000..79d614418
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Softmax.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <NeuralNetworks.h>
+
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../util.h"
+#include "../NEUniqueTensor.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+namespace neon {
+
+bool softmaxFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float beta,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+
+ NEUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ NEUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ auto softmax_f = std::make_shared<arm_compute::NESoftmaxLayer>();
+ softmax_f->configure(input.ptr(), output.ptr(), beta);
+
+ input.allocate();
+ output.allocate();
+
+ if (inputShape.dimensions.size() == 4)
+ {
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+
+ softmax_f->run();
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+ }
+ else if (inputShape.dimensions.size() == 2)
+ {
+ // Softmax comes with 1xN matrix and this is translated to N vector in arm_compute::TensorShape
+ TensorAccess<VectorInputAccessor>(input.ref(), inputData, inputShape);
+
+ softmax_f->run();
+
+ TensorAccess<VectorOutputAccessor>(output.ref(), outputData, outputShape);
+ }
+ else
+ {
+ assert("undefined dimension of input" && 0);
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace neon
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/neon/Softmax.test.cpp b/libs/kernel/acl/src/neon/Softmax.test.cpp
new file mode 100644
index 000000000..988f55078
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Softmax.test.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <arm_compute/core/Types.h>
+#include <kernel/acl/Softmax.h>
+
+#include "../util.h"
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, neon_softmaxFloat32_1xn)
+{
+ float inputData[4];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,4}, 1.0, 0 };
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,4}, 1.0, 0 };
+ const float beta = 1.0f;
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = neon::softmaxFloat32(inputData, inputShape, beta, outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 0.25f, 0.25f, 0.25f, 0.25f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_softmaxFloat32_4d)
+{
+ float inputData[4];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 };
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 };
+ const float beta = 1.0f;
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = neon::softmaxFloat32(inputData, inputShape, beta, outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 0.25f, 0.25f, 0.25f, 0.25f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_softmaxFloat32_1xn_seq)
+{
+ float inputData[4];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 };
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 };
+ const float beta = 1.0f;
+ bool bret;
+
+ util::initData_Increasing(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = neon::softmaxFloat32(inputData, inputShape, beta, outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = {0.032058603280085, 0.0871443187420326, 0.23688281808991, 0.643914259887972};
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_softmaxFloat32_4d_seq)
+{
+ float inputData[4];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 };
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 };
+ const float beta = 1.0f;
+ bool bret;
+
+ util::initData_Increasing(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = neon::softmaxFloat32(inputData, inputShape, beta, outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = {0.032058603280085, 0.0871443187420326, 0.23688281808991, 0.643914259887972};
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
diff --git a/libs/kernel/acl/src/shape.cpp b/libs/kernel/acl/src/shape.cpp
new file mode 100644
index 000000000..3c976ae94
--- /dev/null
+++ b/libs/kernel/acl/src/shape.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cassert>
+
+#include "shape.h"
+
+namespace nnfw {
+namespace rt {
+
+// TODO remove from this source and use it from runtime
+uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx) {
+ if (dimensionIdx >= shape.dimensions.size()) {
+ // TODO, log the error
+ return 0;
+ }
+ return shape.dimensions[dimensionIdx];
+}
+
+} // namespace rt
+} // namespace nnfw
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+namespace util {
+
+arm_compute::TensorShape fromVectorNNShape(const nnfw::rt::Shape& shape)
+{
+ assert(shape.dimensions.size() == 1);
+
+ const uint32_t len = nnfw::rt::getSizeOfDimension(shape, 0);
+
+ return arm_compute::TensorShape(len);
+}
+
+arm_compute::TensorShape fromMatrixNNShape(const nnfw::rt::Shape& shape)
+{
+ assert(shape.dimensions.size() == 2);
+
+ const uint32_t n = nnfw::rt::getSizeOfDimension(shape, 0);
+ const uint32_t c = nnfw::rt::getSizeOfDimension(shape, 1);
+
+ return arm_compute::TensorShape(c, n);
+}
+
+arm_compute::TensorShape fromNNShape(const nnfw::rt::Shape& shape)
+{
+ if( shape.dimensions.size() == 1 )
+ return fromVectorNNShape(shape);
+ else if( shape.dimensions.size() == 2 )
+ return fromMatrixNNShape(shape);
+
+ // TODO: need to treat 3D tensors.
+
+ assert(shape.dimensions.size() == 4);
+
+ // NNAPI assumes the following ordering:
+ //
+ // dim(0) -> N
+ // dim(1) -> H
+ // dim(2) -> W
+ // dim(3) -> C
+ //
+ uint32_t c = nnfw::rt::getSizeOfDimension(shape, 3);
+ uint32_t h = nnfw::rt::getSizeOfDimension(shape, 1);
+ uint32_t w = nnfw::rt::getSizeOfDimension(shape, 2);
+ uint32_t n = nnfw::rt::getSizeOfDimension(shape, 0);
+
+ return arm_compute::TensorShape(w, h, c, n);
+}
+
+} // namespace util
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/shape.h b/libs/kernel/acl/src/shape.h
new file mode 100644
index 000000000..902115ebd
--- /dev/null
+++ b/libs/kernel/acl/src/shape.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_SHAPE_H__
+#define __NNFW_KERNEL_ACL_SHAPE_H__
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include <arm_compute/runtime/IFunction.h>
+#include <cassert>
+
+namespace nnfw {
+namespace rt {
+
+// TODO remove from this source and use it from runtime
+uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx);
+
+} // namespace rt
+} // namespace nnfw
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+namespace util {
+
+arm_compute::TensorShape fromVectorNNShape(const nnfw::rt::Shape& shape);
+arm_compute::TensorShape fromNNShape(const nnfw::rt::Shape& shape);
+
+template<class TensorT, class ActT>
+void insertFusedActivationLayer(TensorT& out, int activation,
+ std::vector<std::shared_ptr<arm_compute::IFunction>>& fns) {
+ auto relu_f = std::make_shared<ActT>();
+
+ switch(activation) {
+ case ANEURALNETWORKS_FUSED_NONE:
+ // DO NOTHING
+ return;
+
+ case ANEURALNETWORKS_FUSED_RELU:
+ {
+ const arm_compute::ActivationLayerInfo relu_info(arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
+
+ // Do in-place update
+ relu_f->configure(out.ptr(), nullptr, relu_info);
+ }
+ break;
+
+ case ANEURALNETWORKS_FUSED_RELU1:
+ {
+ const arm_compute::ActivationLayerInfo relu_info(arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 1.f);
+
+ // Do in-place update
+ relu_f->configure(out.ptr(), nullptr, relu_info);
+ }
+ break;
+
+ case ANEURALNETWORKS_FUSED_RELU6:
+ {
+ const arm_compute::ActivationLayerInfo relu_info(arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f);
+
+ // Do in-place update
+ relu_f->configure(out.ptr(), nullptr, relu_info);
+ }
+ break;
+
+ default:
+ assert("Undefined activation type." && 0);
+ break;
+ }
+
+ fns.emplace_back(relu_f);
+}
+
+} // namespace util
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_SHAPE_H__
diff --git a/libs/kernel/acl/src/support.cpp b/libs/kernel/acl/src/support.cpp
new file mode 100644
index 000000000..d04aef59e
--- /dev/null
+++ b/libs/kernel/acl/src/support.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "support.h"
+
+namespace nnfw
+{
+namespace support
+{
+namespace nnapi
+{
+namespace feature
+{
+
+// TODO Extract this function as utility function
+// NOTE It is not a good design to access nnfw::rt::Shape nnfw_support_nnapi lib
+nnfw::util::feature::Shape asFeatureShape(const nnfw::rt::Shape& shape)
+{
+ // NNAPI assumes the following ordering:
+ //
+ // dim(0) -> N
+ // dim(1) -> H
+ // dim(2) -> W
+ // dim(3) -> C
+ //
+ int32_t c = nnfw::rt::getSizeOfDimension(shape, 3);
+ int32_t h = nnfw::rt::getSizeOfDimension(shape, 1);
+ int32_t w = nnfw::rt::getSizeOfDimension(shape, 2);
+
+ assert(nnfw::rt::getSizeOfDimension(shape, 0) == 1);
+
+ return nnfw::util::feature::Shape{c, h, w};
+}
+
+} // namespace feature
+} // namespace nnapi
+} // namespace support
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/support.h b/libs/kernel/acl/src/support.h
new file mode 100644
index 000000000..751d2c6cb
--- /dev/null
+++ b/libs/kernel/acl/src/support.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_SUPPORT_H_TEMPORARY__
+#define __NNFW_KERNEL_SUPPORT_H_TEMPORARY__
+
+// NOTE these are not decided yet but need to be moved out from Conv2D
+// to separate NEON implementation to it's folder
+// TODO move to some folder where it should be
+
+#include <cassert>
+
+#include "util/feature/Shape.h"
+
+#include <OperationsUtils.h>
+
+namespace nnfw
+{
+namespace support
+{
+namespace nnapi
+{
+namespace feature
+{
+
+// TODO Extract this function as utility function
+// NOTE It is not a good design to access nnfw::rt::Shape nnfw_support_nnapi lib
+nnfw::util::feature::Shape asFeatureShape(const nnfw::rt::Shape& shape);
+
+} // namespace feature
+} // namespace nnapi
+} // namespace support
+} // namespace nnfw
+
+#include <arm_compute/core/ITensor.h>
+
+#include "util/feature/Reader.h"
+
+namespace nnfw
+{
+namespace support
+{
+namespace acl
+{
+namespace feature
+{
+
+template<typename T> class Reader;
+
+template<> class Reader<float> final : public nnfw::util::feature::Reader<float>
+{
+public:
+ Reader(arm_compute::ITensor *tensor) : _tensor{tensor}
+ {
+ assert(tensor->info()->data_type() == arm_compute::DataType::F32);
+ }
+
+public:
+ float at(uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ return *ptr_to_element(ch, row, col);
+ }
+
+private:
+ float *ptr_to_element(uint32_t ch, uint32_t row, uint32_t col) const
+ {
+ // ARM Compute uses CHW ordering
+ return reinterpret_cast<float *>(_tensor->ptr_to_element(arm_compute::Coordinates{col, row, ch}));
+ }
+
+private:
+ arm_compute::ITensor *_tensor;
+};
+
+} // namespace feature
+} // namespace acl
+} // namespace support
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_SUPPORT_H_TEMPORARY__
diff --git a/libs/kernel/acl/src/util.cpp b/libs/kernel/acl/src/util.cpp
new file mode 100644
index 000000000..7e5df534e
--- /dev/null
+++ b/libs/kernel/acl/src/util.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <util/fp32.h>
+
+#include "util.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+namespace util {
+
+void initData(float* data, int num, float value)
+{
+ for (int i = 0; i < num; i++) {
+ *(data + i) = value;
+ }
+}
+
+void initData_Increasing(float* data, int num, float value)
+{
+ for (int i = 0; i < num; i++) {
+ *(data + i) = value;
+ value++;
+ }
+}
+
+// compareData
+// return true if result == expected with the shape info,
+// otherwise false
+bool compareData(const float* result, const float* expected, const nnfw::rt::Shape& shape)
+{
+ if (shape.dimensions.size() == 4)
+ {
+ // TODO fix indentation
+ uint32_t height = nnfw::rt::getSizeOfDimension(shape, 1);
+ uint32_t width = nnfw::rt::getSizeOfDimension(shape, 2);
+ uint32_t numitems = height * width;
+ for (int item = 0; item < numitems; item++) {
+ if (!::nnfw::util::fp32::epsilon_equal(*(result + item), *(expected + item), 1)) {
+ LOG(ERROR) << "compareData failed: result " << *(result + item)
+ << ", expected " << *(expected + item)
+ << ", diff " << ::nnfw::util::fp32::relative_diff(*(result + item), *(expected + item))
+ << std::endl;
+ return false;
+ }
+ }
+ }
+ else if (shape.dimensions.size() == 2)
+ {
+ uint32_t height = nnfw::rt::getSizeOfDimension(shape, 0);
+ uint32_t width = nnfw::rt::getSizeOfDimension(shape, 1);
+ uint32_t numitems = height * width;
+ for (int item = 0; item < numitems; item++) {
+ if (!::nnfw::util::fp32::epsilon_equal(*(result + item), *(expected + item), 1)) {
+ LOG(ERROR) << "compareData failed: result " << *(result + item)
+ << ", expected " << *(expected + item)
+ << ", diff " << ::nnfw::util::fp32::relative_diff(*(result + item), *(expected + item))
+ << std::endl;
+ return false;
+ }
+ }
+ }
+ else
+ {
+ // TODO: add a handler for rank 1 and 3
+ LOG(ERROR) << "Unhandled shape: " << shape.dimensions.size() << std::endl;
+ }
+ return true;
+}
+
+void NCHW2NHWC(const float* nchw, float* nhwc, const nnfw::rt::Shape& shape)
+{
+ uint32_t N = nnfw::rt::getSizeOfDimension(shape, 0);
+ uint32_t H = nnfw::rt::getSizeOfDimension(shape, 1);
+ uint32_t W = nnfw::rt::getSizeOfDimension(shape, 2);
+ uint32_t C = nnfw::rt::getSizeOfDimension(shape, 3);
+
+ for (uint32_t n = 0; n < N; n++) {
+ for (uint32_t c = 0; c < C; c++) {
+ for (uint32_t h = 0; h < H; h++) {
+ for (uint32_t w = 0; w < W; w++) {
+ uint32_t soffset = w + (h * W) + (c * W * H) + (n * W * H * C);
+ uint32_t doffset = c + (w * C) + (h * C * W) + (n * C * W * H);
+ *(nhwc + doffset) = *(nchw + soffset);
+ }
+ }
+ }
+ }
+}
+
+} // namespace util
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/util.h b/libs/kernel/acl/src/util.h
new file mode 100644
index 000000000..48ed02783
--- /dev/null
+++ b/libs/kernel/acl/src/util.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_UTIL_H__
+#define __NNFW_KERNEL_ACL_UTIL_H__
+#include <OperationsUtils.h>
+
+#include <cmath>
+#include <cassert>
+#include <functional>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+namespace util {
+
+// TODO: make a separate module.
+class TensorWrapper {
+public:
+ TensorWrapper(std::vector<uint32_t> dims,
+ OperandType type = OperandType::FLOAT32,
+ float scale = 1.0,
+ int32_t offset = 0)
+ :_shape{type, dims, scale, offset}
+ {
+
+ // currently, we support only FLOAT32 for now.
+ assert( type == OperandType::FLOAT32);
+
+ uint32_t size_bytes = sizeof(float);
+
+ _num_elems = 1;
+ for( auto& d: dims ) {
+ _num_elems *= d;
+ }
+
+ _data = new uint8_t[_num_elems * size_bytes];
+ }
+
+ ~TensorWrapper() {
+ delete [] _data;
+ }
+
+ const nnfw::rt::Shape shape() const {
+ return _shape;
+ }
+
+ uint32_t num_elems() const { return _num_elems; }
+
+ template<class T>
+ T at(const uint32_t& idx) const {
+ return reinterpret_cast<T*>(_data)[idx];
+ }
+
+ template<class T>
+ T& at(const uint32_t& idx) {
+ return reinterpret_cast<T*>(_data)[idx];
+ }
+
+ template<class T>
+ T* ptr() { return reinterpret_cast<T*>(_data); }
+
+ void initValue(float f) {
+ for( uint32_t i = 0; i < _num_elems; ++i ) {
+ at<float>(i) = f;
+ }
+ }
+
+ typedef std::function<float(uint32_t n, uint32_t c, uint32_t h, uint32_t w)> funcInit4;
+ void initValue(funcInit4 f) {
+ assert(_shape.dimensions.size() == 4);
+
+ int N = _shape.dimensions[0];
+ int H = _shape.dimensions[1];
+ int W = _shape.dimensions[2];
+ int C = _shape.dimensions[3];
+
+ for(int n = 0; n < N; ++n) {
+ for(int h = 0; h < H; ++h) {
+ for(int w = 0; w < W; ++w) {
+ for(int c = 0; c < C; ++c) {
+ uint32_t offset = n*H*W*C + h*W*C + w*C + c;
+ at<float>(offset) = f(n,c,h,w);
+ }
+ }
+ }
+ }
+ }
+
+ typedef std::function<float(uint32_t c, uint32_t h, uint32_t w)> funcInit3;
+ void initValue(funcInit3 f) {
+ assert(_shape.dimensions.size() == 3);
+
+ int C = _shape.dimensions[0];
+ int H = _shape.dimensions[1];
+ int W = _shape.dimensions[2];
+
+ for(int h = 0; h < H; ++h) {
+ for(int w = 0; w < W; ++w) {
+ for(int c = 0; c < C; ++c) {
+ uint32_t offset = h*W*C + w*C + c;
+ at<float>(offset) = f(c,h,w);
+ }
+ }
+ }
+ }
+
+ typedef std::function<float(uint32_t h, uint32_t w)> funcInit2;
+ void initValue(funcInit2 f) {
+ assert(_shape.dimensions.size() == 2);
+
+ int H = _shape.dimensions[0];
+ int W = _shape.dimensions[1];
+
+ for(int h = 0; h < H; ++h) {
+ for(int w = 0; w < W; ++w) {
+ uint32_t offset = h*W + w;
+ at<float>(offset) = f(h,w);
+ }
+ }
+ }
+
+ typedef std::function<float(uint32_t w)> funcInit1;
+ void initValue(funcInit1 f) {
+ assert(_shape.dimensions.size() == 1);
+
+ int W = _shape.dimensions[0];
+
+ for(int w = 0; w < W; ++w) {
+ uint32_t offset = w;
+ at<float>(offset) = f(w);
+ }
+ }
+
+ void initValue(std::vector<float> v) {
+ assert(v.size() == _num_elems);
+ for( uint32_t i = 0; i < _num_elems; ++i ) {
+ at<float>(i) = v[i];
+ }
+ }
+
+ bool operator==(const TensorWrapper &t) const {
+ // compare the shape
+ assert(num_elems() == t.num_elems());
+ assert(_shape.type == t.shape().type);
+ assert(_shape.scale == t.shape().scale);
+ assert(_shape.offset == t.shape().offset);
+ assert(_shape.dimensions == t.shape().dimensions);
+
+ // currently, we support only FLOAT32.
+ assert(_shape.type == OperandType::FLOAT32);
+
+ for( uint32_t i = 0; i < _num_elems; ++i ) {
+ if( std::fabs(static_cast<float>(at<float>(i) - t.at<float>(i))) > 0.001f ) {
+ std::cout << "Comparing [" << i << "] " << at<float>(i) << "," << t.at<float>(i) << std::endl;
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+private:
+ nnfw::rt::Shape _shape;
+ uint32_t _num_elems;
+ uint8_t* _data;
+};
+
+void initData(float* data, int num, float value);
+bool compareData(const float* result, const float* expected, const nnfw::rt::Shape& shape);
+void initData_Increasing(float* data, int num, float value);
+
+void NCHW2NHWC(const float* nchw, float* nhwc, const nnfw::rt::Shape& shape);
+
+} // namespace util
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_UTIL_H__