diff options
Diffstat (limited to 'libs/kernel/acl')
47 files changed, 5248 insertions, 0 deletions
diff --git a/libs/kernel/acl/CMakeLists.txt b/libs/kernel/acl/CMakeLists.txt new file mode 100644 index 000000000..8f0486e56 --- /dev/null +++ b/libs/kernel/acl/CMakeLists.txt @@ -0,0 +1,94 @@ +set(LIB_KERNELACL kernelacl) +set(LIB_KERNELACL_TEST kernelacl_test) + +# TODO remove this when default goes to c++14 +if(CMAKE_VERSION VERSION_LESS 3.1.0) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") +else(CMAKE_VERSION VERSION_LESS 3.1.0) + set(CMAKE_CXX_STANDARD 14) +endif(CMAKE_VERSION VERSION_LESS 3.1.0) + +# runtime information +set(PATH_RUNTIME_NN ${CMAKE_SOURCE_DIR}/runtimes/nn) +SET(RUNTIME_INCLUDES ${PATH_RUNTIME_NN}/common/include + ${PATH_RUNTIME_NN}/runtime/include + ${PATH_RUNTIME_NN}/depend/hal/include + ${PATH_RUNTIME_NN}/depend/libhidl/base/include + ${PATH_RUNTIME_NN}/depend/libcutils/include + ${PATH_RUNTIME_NN}/depend/libutils/include + ${PATH_RUNTIME_NN}/depend/android-base/include + ) + +# common +link_directories(${CMAKE_INSTALL_PREFIX}/lib) + +# kernel library +set(KERNELACL_SRCS "src/Init_acl.cpp" + "src/IO_accessor.cpp" + "src/shape.cpp" + "src/support.cpp" + "src/cl/Conv2D.cpp" + "src/cl/DepthwiseConv2D.cpp" + "src/cl/FullyConnected.cpp" + "src/cl/Pooling.cpp" + "src/cl/Reshape.cpp" + "src/cl/Softmax.cpp" + "src/cl/Concatenation.cpp" + "src/neon/Conv2D.cpp" + "src/neon/DepthwiseConv2D.cpp" + "src/neon/FullyConnected.cpp" + "src/neon/Pooling.cpp" + "src/neon/Softmax.cpp" + "src/neon/Reshape.cpp" + "src/neon/Concatenation.cpp" + ) + +add_library(${LIB_KERNELACL} SHARED ${KERNELACL_SRCS}) +target_include_directories(${LIB_KERNELACL} PUBLIC + ${NNFW_INCLUDE_DIR} + ${RUNTIME_INCLUDES} + ${NNFW_ACL_INCLUDES} + ${CMAKE_SOURCE_DIR}/include + ) +target_link_libraries(${LIB_KERNELACL} nnfw_support_nnapi) +if (${TARGET_OS} STREQUAL "tizen") + target_link_libraries(${LIB_KERNELACL} nnfw_util ${NNFW_ACL_LIBS} OpenCL) +else() + target_link_libraries(${LIB_KERNELACL} nnfw_util ${NNFW_ACL_LIBS}) +endif() +install(TARGETS ${LIB_KERNELACL} DESTINATION lib) + +# kernel test executable +set(KERNELACL_TEST_SRCS "src/util.cpp" + "src/gtest_env.cpp" + "src/cl/Conv2D.test.cpp" + "src/cl/DepthwiseConv2D.test.cpp" + "src/cl/FullyConnected.test.cpp" + "src/cl/Pooling.test.cpp" + "src/cl/Reshape.test.cpp" + "src/cl/Softmax.test.cpp" + "src/cl/Concatenation.test.cpp" + "src/neon/Conv2D.test.cpp" + "src/neon/DepthwiseConv2D.test.cpp" + "src/neon/FullyConnected.test.cpp" + "src/neon/Pooling.test.cpp" + "src/neon/Softmax.test.cpp" + "src/neon/Reshape.test.cpp" + "src/neon/Concatenation.test.cpp" + ) + +add_executable(${LIB_KERNELACL_TEST} ${KERNELACL_TEST_SRCS}) +target_include_directories(${LIB_KERNELACL_TEST} PUBLIC + ${NNFW_INCLUDE_DIR} + ${RUNTIME_INCLUDES} + ${NNFW_ACL_INCLUDES} + ) +if (NOT ${TARGET_OS} STREQUAL "tizen") + add_dependencies(${LIB_KERNELACL_TEST} googletest) +endif() +target_link_libraries(${LIB_KERNELACL_TEST} + ${LIB_KERNELACL} + nnfw_util ${NNFW_ACL_LIBS} + ${NNFW_GTEST_LIBS} + ) +install(TARGETS ${LIB_KERNELACL_TEST} DESTINATION unittest) diff --git a/libs/kernel/acl/src/CLUniqueTensor.h b/libs/kernel/acl/src/CLUniqueTensor.h new file mode 100644 index 000000000..6844e4565 --- /dev/null +++ b/libs/kernel/acl/src/CLUniqueTensor.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_KERNEL_ACL_CLUNIQUETENSOR_H__ +#define __NNFW_KERNEL_ACL_CLUNIQUETENSOR_H__ + +#include <arm_compute/runtime/CL/CLTensor.h> + +namespace nnfw { +namespace kernel { +namespace acl { + +class CLUniqueTensor +{ +public: + CLUniqueTensor(const ::arm_compute::TensorInfo &info) + { + _tensor.allocator()->init(info); + } + +public: + // Both copy and move are not allowed + CLUniqueTensor(const CLUniqueTensor &) = delete; + CLUniqueTensor(CLUniqueTensor &&) = delete; + +public: + ~CLUniqueTensor() + { + _tensor.allocator()->free(); + } + +public: + void allocate() + { + _tensor.allocator()->allocate(); + } + +public: + ::arm_compute::CLTensor &ref(void) { return _tensor; } + ::arm_compute::CLTensor *ptr(void) { return &_tensor; } + +private: + ::arm_compute::CLTensor _tensor; +}; + +} // namespace acl +} // namespace kernel +} // namespace nnfw + +#endif //__NNFW_KERNEL_ACL_CLUNIQUETENSOR_H__ diff --git a/libs/kernel/acl/src/DepthwiseConv2D.h b/libs/kernel/acl/src/DepthwiseConv2D.h new file mode 100644 index 000000000..8af8d4fd0 --- /dev/null +++ b/libs/kernel/acl/src/DepthwiseConv2D.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_KERNEL_ACL_DEPTHWISECONV2D_COMMON_H__ +#define __NNFW_KERNEL_ACL_DEPTHWISECONV2D_COMMON_H__ + +#include <OperationsUtils.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> +#include <arm_compute/runtime/IFunction.h> + +#include "shape.h" +#include "IO_accessor.h" + +namespace nnfw { +namespace kernel { +namespace acl { + +namespace common { + +typedef std::function<void (void)> sync_scheduler_f; + +template<class TensorT, class LayerT, class ActT> +bool depthwiseConvFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + const float* filterData, const nnfw::rt::Shape& filterShape, + const float* biasData, const nnfw::rt::Shape& biasShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t depth_multiplier, int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape, + sync_scheduler_f sync_scheduler) { + auto inputShapeACL = util::fromNNShape(inputShape); + auto weightsShapeACL = util::fromNNShape(filterShape); + auto biasShapeACL = util::fromNNShape(biasShape); + auto outputShapeACL = util::fromNNShape(outputShape); + + TensorT input(arm_compute::TensorInfo(inputShapeACL, arm_compute::Format::F32)); + TensorT weights(arm_compute::TensorInfo(weightsShapeACL, arm_compute::Format::F32)); + TensorT bias(arm_compute::TensorInfo(biasShapeACL, arm_compute::Format::F32)); + TensorT output(arm_compute::TensorInfo(outputShapeACL, arm_compute::Format::F32)); + + arm_compute::PadStrideInfo psinfo = arm_compute::PadStrideInfo(stride_width, stride_height, + padding_left, padding_right, + padding_top, padding_bottom, + arm_compute::DimensionRoundingType::FLOOR); + + auto l = std::make_shared<LayerT>(); + l->configure(input.ptr(), weights.ptr(), bias.ptr(), output.ptr(), psinfo); + + std::vector<std::shared_ptr<arm_compute::IFunction>> fns; + + fns.emplace_back(l); + + util::insertFusedActivationLayer<TensorT, ActT>(output, activation, fns); + + input.allocate(); + output.allocate(); + bias.allocate(); + weights.allocate(); + + // TODO: Do we need 2D tensor accessor for the input feature? + TensorAccess<InputAccessor>(input.ref(), inputData, inputShape); + TensorAccess<BiasAccessor>(bias.ref(), biasData, biasShape); + TensorAccess<WeightAccessor>(weights.ref(), filterData, filterShape); + + for (const auto &fn : fns) + { + fn->run(); + } + + sync_scheduler(); + + TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape); + + return true; +} + +} // namespace common + +} // namespace acl +} // namespace kernel +} // namespace nnfw + +#endif // __NNFW_KERNEL_ACL_DEPTHWISECONV2D_COMMON_H__ diff --git a/libs/kernel/acl/src/DepthwiseConv2D.test.h b/libs/kernel/acl/src/DepthwiseConv2D.test.h new file mode 100644 index 000000000..b2c8592ee --- /dev/null +++ b/libs/kernel/acl/src/DepthwiseConv2D.test.h @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> +#include <OperationsUtils.h> +#include <kernel/acl/nnfw_kernel_acl.h> +#include <kernel/acl/DepthwiseConv2D.h> + +// TODO: fix include path in CMakeFiles +#include "util.h" + +#ifndef ACL_TEST +#error "ACL_TEST should be defined first!" +#endif // ACL_TEST + +#ifndef ACL_CORE_FUNC_NAME +#error "ACL_CORE_FUNC_NAME should be defined first!" +#endif // ACL_CORE_FUNC_NAME + +using namespace nnfw::kernel::acl; + +ACL_TEST(KernelACL_TC, dwise_conv2d_1) { + uint32_t input_n = 1; + uint32_t input_h = 3; + uint32_t input_w = 3; + uint32_t input_c = 1; + uint32_t filter_h = 3; + uint32_t filter_w = 3; + uint32_t filter_c = 1; + uint32_t out_h = 1; + uint32_t out_w = 1; + + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t depth_multiplier = 1; + + util::TensorWrapper input({input_n, input_h, input_w, input_c}); + util::TensorWrapper weights({1, filter_h, filter_w, filter_c}); + util::TensorWrapper bias({filter_c}); + util::TensorWrapper output({1, out_h, out_w, filter_c}); + + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + + input.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + uint32_t N = input_n; + uint32_t H = input_h; + uint32_t W = input_w; + uint32_t C = input_c; + + return n*H*W*C + h*W*C + w*C + c; + }); + weights.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + uint32_t N = 1; + uint32_t H = filter_h; + uint32_t W = filter_w; + uint32_t C = filter_c; + + return n*H*W*C + h*W*C + w*C + c; + }); + bias.initValue([](uint32_t w) { + return 0.f; + }); + output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 0.f; + }); + + bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(), + weights.ptr<float>(), weights.shape(), + bias.ptr<float>(), bias.shape(), + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + depth_multiplier, activation, + output.ptr<float>(), output.shape()); + + EXPECT_EQ(bret, true); + + util::TensorWrapper expected({1, out_h, out_w, filter_c}); + expected.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 204.f; + }); + + EXPECT_EQ(output, expected); +} + +ACL_TEST(KernelACL_TC, dwise_conv2d_multi_channel) { + uint32_t input_n = 1; + uint32_t input_h = 3; + uint32_t input_w = 3; + uint32_t input_c = 3; + uint32_t filter_h = 3; + uint32_t filter_w = 3; + uint32_t filter_c = input_c; + uint32_t out_h = 1; + uint32_t out_w = 1; + + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t depth_multiplier = 1; + + util::TensorWrapper input({input_n, input_h, input_w, input_c}); + util::TensorWrapper weights({1, filter_h, filter_w, filter_c}); + util::TensorWrapper bias({filter_c}); + util::TensorWrapper output({1, out_h, out_w, filter_c}); + + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + + input.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + uint32_t N = input_n; + uint32_t H = input_h; + uint32_t W = input_w; + uint32_t C = input_c; + + return n*H*W*C + h*W*C + w*C + c; + }); + weights.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + uint32_t N = 1; + uint32_t H = filter_h; + uint32_t W = filter_w; + uint32_t C = filter_c; + + return n*H*W*C + h*W*C + w*C + c; + }); + bias.initValue([](uint32_t w) { + return 0.f; + }); + output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 0.f; + }); + + bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(), + weights.ptr<float>(), weights.shape(), + bias.ptr<float>(), bias.shape(), + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + depth_multiplier, activation, + output.ptr<float>(), output.shape()); + + EXPECT_EQ(bret, true); + + util::TensorWrapper expected({1, out_h, out_w, filter_c}); + expected.initValue({ + 1836.f, + 2061.f, + 2304.f + }); + + EXPECT_EQ(output, expected); +} + +ACL_TEST(KernelACL_TC, dwise_conv2d_inception_1) { + uint32_t input_n = 1; + uint32_t input_h = 112; + uint32_t input_w = 112; + uint32_t input_c = 32; + uint32_t filter_h = 3; + uint32_t filter_w = 3; + uint32_t filter_c = input_c; + uint32_t out_h = 112; + uint32_t out_w = 112; + + int32_t padding_left = 1; + int32_t padding_right = 1; + int32_t padding_top = 1; + int32_t padding_bottom = 1; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t depth_multiplier = 1; + + util::TensorWrapper input({input_n, input_h, input_w, input_c}); + util::TensorWrapper weights({1, filter_h, filter_w, filter_c}); + util::TensorWrapper bias({filter_c}); + util::TensorWrapper output({1, out_h, out_w, filter_c}); + + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU6); + + input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return c; + }); + weights.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return c; + }); + bias.initValue([](uint32_t w) { + return 0.f; + }); + output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 0.f; + }); + + bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(), + weights.ptr<float>(), weights.shape(), + bias.ptr<float>(), bias.shape(), + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + depth_multiplier, activation, + output.ptr<float>(), output.shape()); + + EXPECT_EQ(bret, true); + + util::TensorWrapper expected({1, out_h, out_w, filter_c}); + expected.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + float v = 9.f; + if( h == 0 || h == out_h-1 ) + v -= 3.f; + if( w == 0 || w == out_w-1 ) + v -= 3.f; + + // four corners + if( (w == 0 && h == 0) + || (w == 0 && h == out_h-1) + || (w == out_w-1 && h == 0) + || (w == out_w-1 && h == out_h-1) ) + v += 1.f; + + // Assumption: negative numbers cannot appear because + // only positive numbers exist in the input and weights. + float ret = c*c*v; + return std::min(ret, 6.f); + }); + + EXPECT_EQ(output, expected); +} diff --git a/libs/kernel/acl/src/FullyConnected.h b/libs/kernel/acl/src/FullyConnected.h new file mode 100644 index 000000000..5030a8548 --- /dev/null +++ b/libs/kernel/acl/src/FullyConnected.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_KERNEL_ACL_FULLYCONNECTED_COMMON_H__ +#define __NNFW_KERNEL_ACL_FULLYCONNECTED_COMMON_H__ + +#include <OperationsUtils.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> +#include <arm_compute/runtime/IFunction.h> + +#include "shape.h" +#include "IO_accessor.h" + +namespace nnfw { +namespace kernel { +namespace acl { + +namespace common { + +typedef std::function<void (void)> sync_scheduler_f; + +template<class TensorT, class LayerT, class ActT> +bool fullyConnectedFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + const float* weightsData, const nnfw::rt::Shape& weightsShape, + const float* biasData, const nnfw::rt::Shape& biasShape, + int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape, + sync_scheduler_f sync_scheduler) { + + // NNAPI specification: https://developer.android.com/ndk/reference/group___neural_networks.html#ggaabbe492c60331b13038e39d4207940e0aaada7a3dbaf4676aba560c933ff610c5 + + // According to the NNAPI Specification, + // INPUT + // 1. input rank is up to 4. + // 2. if input rank > 2, it is flattened to rank 2 [batch_size, input_size] + nnfw::rt::Shape flattenedInputShape = inputShape; + switch(inputShape.dimensions.size()) { + case 1: + { + assert("Need to be implemented." && 0); + break; + } + case 2: + { + // DO NOTHING. + break; + } + case 3: + { + assert("Need to be implemented." && 0); + break; + } + case 4: + { + auto N = inputShape.dimensions[0]; + auto H = inputShape.dimensions[1]; + auto W = inputShape.dimensions[2]; + auto C = inputShape.dimensions[3]; + flattenedInputShape.dimensions = {N, H*W*C}; + break; + } + default: + assert(inputShape.dimensions.size() <= 4); + } + // Finally, flattenedInputShape is a 2D tensor. + + // WEIGHTS is a 2D tensor + assert(weightsShape.dimensions.size() == 2); + + // BIAS is a 1D tensor + assert(biasShape.dimensions.size() == 1); + + // OUTPUT is a 2D tensor. + assert(outputShape.dimensions.size() == 2); + + auto input_shape = util::fromNNShape(flattenedInputShape); + auto weights_shape = util::fromNNShape(weightsShape); + auto bias_shape = util::fromNNShape(biasShape); + auto output_shape = util::fromNNShape(outputShape); + + assert(activation == ANEURALNETWORKS_FUSED_NONE || activation == ANEURALNETWORKS_FUSED_RELU); + + std::vector<std::shared_ptr<arm_compute::IFunction>> fns; + + TensorT input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + TensorT output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + TensorT bias(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32)); + TensorT weights(arm_compute::TensorInfo(weights_shape, arm_compute::Format::F32)); + + auto fc = std::make_shared<LayerT>(); + fc->configure(input.ptr(), weights.ptr(), bias.ptr(), output.ptr()); + + fns.emplace_back(fc); + + if (ANEURALNETWORKS_FUSED_RELU == activation) + { + auto relu_f = std::make_shared<ActT>(); + + const arm_compute::ActivationLayerInfo relu_info{arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; + + // Do in-place update + relu_f->configure(output.ptr(), nullptr, relu_info); + + fns.emplace_back(relu_f); + } + + input.allocate(); + output.allocate(); + bias.allocate(); + weights.allocate(); + + // TODO: Do we need 2D tensor accessor for the input feature? + TensorAccess<MatrixWeightAccessor>(input.ref(), inputData, inputShape); + TensorAccess<BiasAccessor>(bias.ref(), biasData, biasShape); + TensorAccess<MatrixWeightAccessor>(weights.ref(), weightsData, weightsShape); + + for (const auto &fn : fns) + { + fn->run(); + } + + sync_scheduler(); + + TensorAccess<MatrixOutputAccessor>(output.ref(), outputData, outputShape); + + return true; +} + +} // namespace common + +} // namespace acl +} // namespace kernel +} // namespace nnfw + +#endif // __NNFW_KERNEL_ACL_FULLYCONNECTED_COMMON_H__ diff --git a/libs/kernel/acl/src/FullyConnected.test.h b/libs/kernel/acl/src/FullyConnected.test.h new file mode 100644 index 000000000..01bbff802 --- /dev/null +++ b/libs/kernel/acl/src/FullyConnected.test.h @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> +#include <OperationsUtils.h> +#include <kernel/acl/nnfw_kernel_acl.h> +#include <kernel/acl/FullyConnected.h> + +// TODO: fix include path in CMakeFiles +#include "util.h" + +#ifndef ACL_TEST +#error "ACL_TEST should be defined first!" +#endif // ACL_TEST + +#ifndef ACL_CORE_FUNC_NAME +#error "ACL_CORE_FUNC_NAME should be defined first!" +#endif // ACL_CORE_FUNC_NAME + +using namespace nnfw::kernel::acl; +using fullyConnectedFloat32T = bool (*)(const float* inputData, const nnfw::rt::Shape& inputShape, + const float* weightsData, const nnfw::rt::Shape& weightsShape, + const float* biasData, const nnfw::rt::Shape& biasShape, + int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape); + +ACL_TEST(KernelACL_TC, fcFloat32_1) { + + util::TensorWrapper input({1,1,1,100}); + util::TensorWrapper weights({1,100}); + util::TensorWrapper bias({1}); + util::TensorWrapper output({1,1}); + + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + + input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.f; + }); + weights.initValue([](uint32_t h, uint32_t w) { + return 1.f; + }); + bias.initValue([](uint32_t w) { + return 0.f; + }); + output.initValue([](uint32_t h, uint32_t w) { + return 0.f; + }); + + bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(), + weights.ptr<float>(), weights.shape(), + bias.ptr<float>(), bias.shape(), + activation, + output.ptr<float>(), output.shape()); + + EXPECT_EQ(bret, true); + + util::TensorWrapper expected({1,1}); + expected.initValue([](uint32_t h, uint32_t w) { + return 100.f; + }); + + EXPECT_EQ(output, expected); +} + +ACL_TEST(KernelACL_TC, fcFloat32_relu) { + + util::TensorWrapper input({1,1,1,100}); + util::TensorWrapper weights({1,100}); + util::TensorWrapper bias({1}); + util::TensorWrapper output({1,1}); + + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + + input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.f; + }); + weights.initValue([](uint32_t h, uint32_t w) { + return -1.f; + }); + bias.initValue([](uint32_t w) { + return 0.f; + }); + output.initValue([](uint32_t h, uint32_t w) { + return 0.f; + }); + + bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(), + weights.ptr<float>(), weights.shape(), + bias.ptr<float>(), bias.shape(), + activation, + output.ptr<float>(), output.shape()); + + EXPECT_EQ(bret, true); + + util::TensorWrapper expected({1,1}); + expected.initValue([](uint32_t h, uint32_t w) { + return 0.f; + }); + + EXPECT_EQ(output, expected); +} + +ACL_TEST(KernelACL_TC, fcFloat32_conv_fc) { + uint32_t input_n = 1; + uint32_t input_c = 5; + uint32_t input_h = 4; + uint32_t input_w = 4; + uint32_t weight_n = 6; + + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + + util::TensorWrapper input({input_n, input_h, input_w, input_c}); + util::TensorWrapper weight({weight_n, input_c*input_h*input_w}); + util::TensorWrapper bias({weight_n}); + util::TensorWrapper output({1, weight_n}); + + input.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + uint32_t N = input_n; + uint32_t H = input_h; + uint32_t W = input_w; + uint32_t C = input_c; + + return n*H*W*C + h*W*C + w*C + c; + }); + + weight.initValue([&](uint32_t h, uint32_t w) { + uint32_t H = weight_n; + uint32_t W = input_c*input_h*input_w; + + return h*W + w; + }); + + bias.initValue([](uint32_t w) { + return 0.f; + }); + + output.initValue([](uint32_t h, uint32_t w) { + return 0.f; + }); + + bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(), + weight.ptr<float>(), weight.shape(), + bias.ptr<float>(), bias.shape(), + activation, + output.ptr<float>(), output.shape()); + + EXPECT_EQ(bret, true); + + util::TensorWrapper expected({1, weight_n}); + expected.initValue({ + 167480.f, + 420280.f, + 673080.f, + 925880.f, + 1178680.f, + 1431480.f}); + + EXPECT_EQ(output, expected); +} + +ACL_TEST(KernelACL_TC, fcFloat32_fc_fc) { + uint32_t input_n = 6; + uint32_t weight_n = 6; + + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + + util::TensorWrapper input({1, input_n}); + util::TensorWrapper weight({weight_n, input_n}); + util::TensorWrapper bias({weight_n}); + util::TensorWrapper output({1, weight_n}); + + input.initValue([&](uint32_t h, uint32_t w) { + // not use h because h = 0. + return (float)w; + }); + + weight.initValue([&](uint32_t h, uint32_t w) { + uint32_t H = weight_n; + uint32_t W = input_n; + + return (float)(h*W + w); + }); + + bias.initValue([](uint32_t w) { + return 0.f; + }); + + output.initValue([](uint32_t h, uint32_t w) { + return 0.f; + }); + + bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(), + weight.ptr<float>(), weight.shape(), + bias.ptr<float>(), bias.shape(), + activation, + output.ptr<float>(), output.shape()); + + EXPECT_EQ(bret, true); + + util::TensorWrapper expected({1, weight_n}); + expected.initValue({ + 55.f, + 145.f, + 235.f, + 325.f, + 415.f, + 505.f, + }); + + EXPECT_EQ(output, expected); +} + +ACL_TEST(KernelACL_TC, fcFloat32_inceptionv3) { + + uint32_t input_c = 2048; + uint32_t weight_n = 1008; + + util::TensorWrapper input({1,1,1,input_c}); + util::TensorWrapper weight({weight_n,input_c}); + util::TensorWrapper bias({weight_n}); + util::TensorWrapper output({1, weight_n}); + + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + + input.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.f; + }); + weight.initValue([&](uint32_t h, uint32_t w) { + return (float)h; + }); + bias.initValue([](uint32_t w) { + return 0.f; + }); + output.initValue([](uint32_t h, uint32_t w) { + return 0.f; + }); + + bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(), + weight.ptr<float>(), weight.shape(), + bias.ptr<float>(), bias.shape(), + activation, + output.ptr<float>(), output.shape()); + + EXPECT_EQ(bret, true); + + util::TensorWrapper expected({1, weight_n}); + expected.initValue([&](uint32_t h, uint32_t w) { + return w*input_c; + }); + + EXPECT_EQ(output, expected); +} + diff --git a/libs/kernel/acl/src/IO_accessor.cpp b/libs/kernel/acl/src/IO_accessor.cpp new file mode 100644 index 000000000..410fb8ea5 --- /dev/null +++ b/libs/kernel/acl/src/IO_accessor.cpp @@ -0,0 +1,310 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "IO_accessor.h" + +#include <cassert> + +namespace nnfw { +namespace kernel { +namespace acl { + +InputAccessor::InputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape) + : _inputData(inputData) + , _inputShape(inputShape) +{ +} + +MatrixInputAccessor::MatrixInputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape) + : _inputData(inputData) + , _inputShape(inputShape) +{ +} + +VectorInputAccessor::VectorInputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape) + : _inputData(inputData) + , _inputShape(inputShape) +{ +} + +WeightAccessor::WeightAccessor(const float* filterData, const nnfw::rt::Shape& filterShape) + : _filterData(filterData) + , _filterShape(filterShape) +{ +} + +MatrixWeightAccessor::MatrixWeightAccessor(const float* filterData, const nnfw::rt::Shape& filterShape) + : _filterData(filterData) + , _filterShape(filterShape) +{ +} + +BiasAccessor::BiasAccessor(const float* biasData, const nnfw::rt::Shape& biasShape) + : _biasData(biasData) + , _biasShape(biasShape) +{ +} + +OutputAccessor::OutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape) + : _outputData(outputData) + , _outputShape(outputShape) +{ +} + +MatrixOutputAccessor::MatrixOutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape) + : _outputData(outputData) + , _outputShape(outputShape) +{ +} + +VectorOutputAccessor::VectorOutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape) + : _outputData(outputData) + , _outputShape(outputShape) +{ +} + +static uint32_t getOffsetNCHW(const nnfw::rt::Shape& shape, const arm_compute::Coordinates& id) +{ + // get offset for ACL(NCHW) from data of NNAPI(NHWC) + uint32_t num = getSizeOfDimension(shape, 0); + uint32_t height = getSizeOfDimension(shape, 1); + uint32_t width = getSizeOfDimension(shape, 2); + uint32_t chann = getSizeOfDimension(shape, 3); + uint32_t stride = 1; + uint32_t offset = 0; + uint32_t numdim = id.num_dimensions(); + offset += numdim > 0 ? id[0] * stride : 0; stride *= width; + offset += numdim > 1 ? id[1] * stride : 0; stride *= height; + offset += numdim > 2 ? id[2] * stride : 0; stride *= chann; + offset += numdim > 3 ? id[3] * stride : 0; stride *= num; + return offset; +} + +static uint32_t getElementOffset(const nnfw::rt::Shape& shape, + uint32_t ch, uint32_t row, uint32_t col) +{ + assert(getSizeOfDimension(shape, 0) == 1); + assert(shape.dimensions.size() == 4); + + // TODO Optimize this! + const uint32_t W = getSizeOfDimension(shape, 2); + const uint32_t C = getSizeOfDimension(shape, 3); + + int offset = 0; + + // NNAPI uses NHWC ordering + offset += row * W * C; + offset += col * C; + offset += ch; + + return offset; +} + +static uint32_t getElementOffset(const nnfw::rt::Shape& shape, + uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) +{ + assert(shape.dimensions.size() == 4); + + // TODO Optimize this! + const uint32_t H = getSizeOfDimension(shape, 1); + const uint32_t W = getSizeOfDimension(shape, 2); + const uint32_t C = getSizeOfDimension(shape, 3); + + int offset = 0; + + // NNAPI uses NHWC ordering + offset += nth * H * W * C; + offset += row * W * C; + offset += col * C; + offset += ch; + + return offset; +} + +bool InputAccessor::access_tensor(arm_compute::ITensor &tensor) +{ + arm_compute::Window window; + window.use_tensor_dimensions(tensor.info()->tensor_shape()); + + execute_window_loop(window, [&](const arm_compute::Coordinates& id) + { + const uint32_t ch = id[2]; + const uint32_t row = id[1]; + const uint32_t col = id[0]; + + uint32_t offset = getElementOffset(_inputShape, ch, row, col); + + *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = + *(_inputData + offset); + }); + return true; +} + +bool MatrixInputAccessor::access_tensor(arm_compute::ITensor &tensor) +{ + arm_compute::Window window; + window.use_tensor_dimensions(tensor.info()->tensor_shape()); + + assert(tensor.info()->tensor_shape().num_dimensions() <= 2); + + execute_window_loop(window, [&](const arm_compute::Coordinates& id) + { + const auto row = id[1]; + const auto col = id[0]; + const auto W = tensor.info()->tensor_shape().x(); + + const auto offset = row * W + col; + + *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = + *(_inputData + offset); + }); + return true; +} + +bool VectorInputAccessor::access_tensor(arm_compute::ITensor &tensor) +{ + arm_compute::Window window; + window.use_tensor_dimensions(tensor.info()->tensor_shape()); + + assert(tensor.info()->tensor_shape().num_dimensions() == 1); + + execute_window_loop(window, [&](const arm_compute::Coordinates& id) + { + uint32_t offset = id[0]; + + *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = + *(_inputData + offset); + }); + return true; +} + +bool WeightAccessor::access_tensor(arm_compute::ITensor &tensor) +{ + arm_compute::Window window; + window.use_tensor_dimensions(tensor.info()->tensor_shape()); + + execute_window_loop(window, [&](const arm_compute::Coordinates& id) + { + const uint32_t nth = id[3]; + const uint32_t ch = id[2]; + const uint32_t row = id[1]; + const uint32_t col = id[0]; + + uint32_t offset = getElementOffset(_filterShape, nth, ch, row, col); + + *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = + *(_filterData + offset); + }); + return true; +} + +bool MatrixWeightAccessor::access_tensor(arm_compute::ITensor &tensor) +{ + arm_compute::Window window; + window.use_tensor_dimensions(tensor.info()->tensor_shape()); + + assert(tensor.info()->tensor_shape().num_dimensions() <= 2); + + execute_window_loop(window, [&](const arm_compute::Coordinates& id) + { + const auto row = id[1]; + const auto col = id[0]; + const auto W = tensor.info()->tensor_shape().x(); + + uint32_t offset = row * W + col; + + *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = + *(_filterData + offset); + }); + return true; +} + +bool BiasAccessor::access_tensor(arm_compute::ITensor &tensor) +{ + arm_compute::Window window; + window.use_tensor_dimensions(tensor.info()->tensor_shape()); + + execute_window_loop(window, [&](const arm_compute::Coordinates& id) + { + uint32_t offset = getOffsetNCHW(_biasShape, id); + *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = + *(_biasData + offset); + }); + return true; +} + +bool OutputAccessor::access_tensor(arm_compute::ITensor &tensor) +{ + arm_compute::Window window; + window.use_tensor_dimensions(tensor.info()->tensor_shape()); + + execute_window_loop(window, [&](const arm_compute::Coordinates& id) + { + const uint32_t ch = id[2]; + const uint32_t row = id[1]; + const uint32_t col = id[0]; + + uint32_t offset = getElementOffset(_outputShape, ch, row, col); + + *(_outputData + offset) = + *reinterpret_cast<float *>(tensor.ptr_to_element(id)); + }); + return false; // end the network +} + +bool VectorOutputAccessor::access_tensor(arm_compute::ITensor &tensor) +{ + arm_compute::Window window; + window.use_tensor_dimensions(tensor.info()->tensor_shape()); + + assert(tensor.info()->tensor_shape().num_dimensions() == 1); + + execute_window_loop(window, [&](const arm_compute::Coordinates& id) + { + const uint32_t x = id[0]; + + uint32_t offset = x; + + *(_outputData + offset) = + *reinterpret_cast<float *>(tensor.ptr_to_element(id)); + }); + return false; // end the network +} + +bool MatrixOutputAccessor::access_tensor(arm_compute::ITensor &tensor) +{ + arm_compute::Window window; + window.use_tensor_dimensions(tensor.info()->tensor_shape()); + + assert(tensor.info()->tensor_shape().num_dimensions() <= 2); + + execute_window_loop(window, [&](const arm_compute::Coordinates& id) + { + const auto row = id[1]; + const auto col = id[0]; + const auto W = tensor.info()->tensor_shape().x(); + + const auto offset = row * W + col; + + *(_outputData + offset) = + *reinterpret_cast<float *>(tensor.ptr_to_element(id)); + }); + return false; // end the network +} + +} // namespace acl +} // namespace kernel +} // namespace nnfw diff --git a/libs/kernel/acl/src/IO_accessor.h b/libs/kernel/acl/src/IO_accessor.h new file mode 100644 index 000000000..e7670f15c --- /dev/null +++ b/libs/kernel/acl/src/IO_accessor.h @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_KERNEL_ACL_IO_ACCESSOR_H__ +#define __NNFW_KERNEL_ACL_IO_ACCESSOR_H__ + +#include <arm_compute/graph/ITensorAccessor.h> +#include <arm_compute/runtime/CL/CLFunctions.h> +#include <arm_compute/runtime/NEON/NEFunctions.h> + +#include <OperationsUtils.h> // for nnfw::rt::Shape + +namespace nnfw { +namespace kernel { +namespace acl { + +class InputAccessor : public arm_compute::graph::ITensorAccessor +{ +public: + InputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape); + InputAccessor(InputAccessor&&) = default; + + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor& tensor) override; + +private: + const float* _inputData; + const nnfw::rt::Shape& _inputShape; +}; + +class MatrixInputAccessor : public arm_compute::graph::ITensorAccessor +{ +public: + MatrixInputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape); + MatrixInputAccessor(MatrixInputAccessor&&) = default; + + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor& tensor) override; + +private: + const float* _inputData; + const nnfw::rt::Shape& _inputShape; +}; + +class VectorInputAccessor : public arm_compute::graph::ITensorAccessor +{ +public: + VectorInputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape); + VectorInputAccessor(VectorInputAccessor&&) = default; + + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor& tensor) override; + +private: + const float* _inputData; + const nnfw::rt::Shape& _inputShape; +}; + +class WeightAccessor : public arm_compute::graph::ITensorAccessor +{ +public: + WeightAccessor(const float* filterData, const nnfw::rt::Shape& filterShape); + WeightAccessor(WeightAccessor&&) = default; + + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor& tensor) override; + +private: + const float* _filterData; + const nnfw::rt::Shape& _filterShape; +}; + +class MatrixWeightAccessor : public arm_compute::graph::ITensorAccessor +{ +public: + MatrixWeightAccessor(const float* filterData, const nnfw::rt::Shape& filterShape); + MatrixWeightAccessor(MatrixWeightAccessor&&) = default; + + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor& tensor) override; + +private: + const float* _filterData; + const nnfw::rt::Shape& _filterShape; +}; + +class BiasAccessor : public arm_compute::graph::ITensorAccessor +{ +public: + BiasAccessor(const float* biasData, const nnfw::rt::Shape& biasShape); + BiasAccessor(BiasAccessor&&) = default; + + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor& tensor) override; + +private: + const float* _biasData; + const nnfw::rt::Shape& _biasShape; +}; + +class OutputAccessor : public arm_compute::graph::ITensorAccessor +{ +public: + OutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape); + OutputAccessor(OutputAccessor&&) = default; + + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor& tensor) override; + +private: + float* _outputData; + const nnfw::rt::Shape& _outputShape; +}; + +class MatrixOutputAccessor : public arm_compute::graph::ITensorAccessor +{ +public: + MatrixOutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape); + MatrixOutputAccessor(MatrixOutputAccessor&&) = default; + + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor& tensor) override; + +private: + float* _outputData; + const nnfw::rt::Shape& _outputShape; +}; + +class VectorOutputAccessor : public arm_compute::graph::ITensorAccessor +{ +public: + VectorOutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape); + VectorOutputAccessor(VectorOutputAccessor&&) = default; + + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor& tensor) override; + +private: + float* _outputData; + const nnfw::rt::Shape& _outputShape; +}; + +template<typename AccessorType> +inline void TensorAccess(arm_compute::CLTensor& tensor, const float* data, + const nnfw::rt::Shape& shape) +{ + tensor.map(); + AccessorType accessor(data, shape); + accessor.access_tensor(tensor); + tensor.unmap(); +} + +template<typename AccessorType> +inline void TensorAccess(arm_compute::CLTensor& tensor, float* data, + const nnfw::rt::Shape& shape) +{ + tensor.map(); + AccessorType accessor(data, shape); + accessor.access_tensor(tensor); + tensor.unmap(); +} + +template<typename AccessorType> +inline void TensorAccess(arm_compute::Tensor& tensor, const float* data, + const nnfw::rt::Shape& shape) +{ + AccessorType accessor(data, shape); + accessor.access_tensor(tensor); +} + +template<typename AccessorType> +inline void TensorAccess(arm_compute::Tensor& tensor, float* data, + const nnfw::rt::Shape& shape) +{ + AccessorType accessor(data, shape); + accessor.access_tensor(tensor); +} + +} // namespace acl +} // namespace kernel +} // namespace nnfw + +#endif // __NNFW_KERNEL_ACL_IO_ACCESSOR_H__ diff --git a/libs/kernel/acl/src/Init_acl.cpp b/libs/kernel/acl/src/Init_acl.cpp new file mode 100644 index 000000000..cabf079fa --- /dev/null +++ b/libs/kernel/acl/src/Init_acl.cpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <arm_compute/runtime/CL/CLScheduler.h> +#include <kernel/acl/nnfw_kernel_acl.h> + +namespace nnfw { +namespace kernel { +namespace acl { + +// This will do one time initialization but can be called multiple times +void Initialize(void) +{ + arm_compute::CLScheduler::get().default_init(); +} + +} // namespace acl +} // namespace kernel +} // namespace nnfw diff --git a/libs/kernel/acl/src/NEUniqueTensor.h b/libs/kernel/acl/src/NEUniqueTensor.h new file mode 100644 index 000000000..34412f9e3 --- /dev/null +++ b/libs/kernel/acl/src/NEUniqueTensor.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_KERNEL_ACL_NEUNIQUETENSOR_H__ +#define __NNFW_KERNEL_ACL_NEUNIQUETENSOR_H__ + +#include <arm_compute/runtime/Tensor.h> + +namespace nnfw { +namespace kernel { +namespace acl { + +// TODO: find a way to merge CLUniqueTensor and NEUniqueTensor. +class NEUniqueTensor +{ +public: + NEUniqueTensor(const ::arm_compute::TensorInfo &info) + { + _tensor.allocator()->init(info); + } + +public: + // Both copy and move are not allowed + NEUniqueTensor(const NEUniqueTensor &) = delete; + NEUniqueTensor(NEUniqueTensor &&) = delete; + +public: + ~NEUniqueTensor() + { + _tensor.allocator()->free(); + } + +public: + void allocate() + { + _tensor.allocator()->allocate(); + } + +public: + ::arm_compute::Tensor &ref(void) { return _tensor; } + ::arm_compute::Tensor *ptr(void) { return &_tensor; } + +private: + ::arm_compute::Tensor _tensor; +}; + +} // namespace acl +} // namespace kernel +} // namespace nnfw + +#endif //__NNFW_KERNEL_ACL_NEUNIQUETENSOR_H__ diff --git a/libs/kernel/acl/src/Reshape.h b/libs/kernel/acl/src/Reshape.h new file mode 100644 index 000000000..ebd82477d --- /dev/null +++ b/libs/kernel/acl/src/Reshape.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_KERNEL_ACL_RESHAPE_COMMON_H__ +#define __NNFW_KERNEL_ACL_RESHAPE_COMMON_H__ +#include <OperationsUtils.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> + +// TODO: fix include path in CMakeFiles +#include "IO_accessor.h" +#include "shape.h" + +namespace nnfw { +namespace kernel { +namespace acl { + +namespace common { + +typedef std::function<void (void)> sync_scheduler_f; + +template<class TensorT, class LayerT> +bool reshapeGeneric(const void* inputData, const nnfw::rt::Shape& inputShape, + void* outputData, const nnfw::rt::Shape& outputShape, + sync_scheduler_f sync_scheduler) { + + auto input_shape = util::fromNNShape(inputShape); + auto output_shape = util::fromNNShape(outputShape); + + TensorT input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + TensorT output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + + LayerT l; + + l.configure(input.ptr(), output.ptr()); + + input.allocate(); + output.allocate(); + + TensorAccess<InputAccessor>(input.ref(), (float*)inputData, inputShape); + + l.run(); + + sync_scheduler(); + + TensorAccess<OutputAccessor>(output.ref(), (float*)outputData, outputShape); + + return true; +} + +} // namespace common + +} // namespace acl +} // namespace kernel +} // namespace nnfw + +#endif // __NNFW_KERNEL_ACL_RESHAPE_COMMON_H__ diff --git a/libs/kernel/acl/src/Reshape.test.h b/libs/kernel/acl/src/Reshape.test.h new file mode 100644 index 000000000..a96a896a6 --- /dev/null +++ b/libs/kernel/acl/src/Reshape.test.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> +#include <OperationsUtils.h> +#include <kernel/acl/nnfw_kernel_acl.h> +#include <kernel/acl/Reshape.h> + +// TODO: fix include path in CMakeFiles +#include "util.h" + +#ifndef ACL_TEST +#error "ACL_TEST should be defined first!" +#endif // ACL_TEST + +#ifndef ACL_CORE_FUNC_NAME +#error "ACL_CORE_FUNC_NAME should be defined first!" +#endif // ACL_CORE_FUNC_NAME + +using namespace nnfw::kernel::acl; + +ACL_TEST(KernelACL_TC, reshape_1) { + const nnfw::rt::Shape inputShape = {OperandType::FLOAT32, {1,1,9,1}, 1.0, 0}; + float inputData[9] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; + + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float outputData[9] = {0}; + + bool bret = ACL_CORE_FUNC_NAME(inputData, inputShape, + outputData, outputShape); + + EXPECT_EQ(bret, true); + + float expectData[9] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); + +} diff --git a/libs/kernel/acl/src/cl/Concatenation.cpp b/libs/kernel/acl/src/cl/Concatenation.cpp new file mode 100644 index 000000000..9376006ca --- /dev/null +++ b/libs/kernel/acl/src/cl/Concatenation.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> + +#include <cassert> + +// TODO: fix include path in CMakeFiles +#include "../IO_accessor.h" +#include "../shape.h" + +namespace nnfw { +namespace kernel { +namespace acl { + +bool concatenationFloat32(const std::vector<const float*>& inputDataPtrs, + const std::vector<nnfw::rt::Shape>& inputShapes, int32_t axis, + float* outputData, const nnfw::rt::Shape& outputShape) +{ + if (axis != 3) + { + assert("Only support axis=3 for ACL" && 0); + return false; + } + assert(inputDataPtrs.size() == inputShapes.size()); + + std::vector<arm_compute::CLTensor*> inputPtrs; + std::vector<arm_compute::ICLTensor*> inputIptrs; + arm_compute::CLTensor output; + + // init Tensors + std::vector<nnfw::rt::Shape>::const_iterator it_inputShape = inputShapes.begin(); + for (auto inputData : inputDataPtrs) + { + const nnfw::rt::Shape& inputShape = *it_inputShape; + arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); + arm_compute::CLTensor* inputPtr = new arm_compute::CLTensor(); + + inputPtr->allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + inputPtrs.push_back(inputPtr); + inputIptrs.push_back(inputPtr); + + it_inputShape++; + } + arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); + output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + + // prepare ACL Concatenate and configure tensors + auto concat = std::make_shared<arm_compute::CLDepthConcatenateLayer>(); + concat->configure(inputIptrs, &output); + + // allocate Tensors + it_inputShape = inputShapes.begin(); + std::vector<const float*>::const_iterator it_inputData = inputDataPtrs.begin(); + for (auto inputPtr : inputPtrs) + { + inputPtr->allocator()->allocate(); + + const float* inputData = *it_inputData; + const nnfw::rt::Shape& inputShape = *it_inputShape; + + TensorAccess<InputAccessor>(*inputPtr, inputData, inputShape); + + it_inputShape++; + it_inputData++; + } + output.allocator()->allocate(); + + // run + concat->run(); + arm_compute::CLScheduler::get().sync(); + + // get output + TensorAccess<OutputAccessor>(output, outputData, outputShape); + + // cleanup + for (auto inputPtr : inputPtrs) + { + inputPtr->allocator()->free(); + delete inputPtr; + } + output.allocator()->free(); + + return true; +} + +} // namespace acl +} // namespace kernel +} // namespace nnfw diff --git a/libs/kernel/acl/src/cl/Concatenation.test.cpp b/libs/kernel/acl/src/cl/Concatenation.test.cpp new file mode 100644 index 000000000..b2c5a5891 --- /dev/null +++ b/libs/kernel/acl/src/cl/Concatenation.test.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> +#include <OperationsUtils.h> +#include <kernel/acl/nnfw_kernel_acl.h> +#include <kernel/acl/Concatenation.h> + +// TODO: fix include path in CMakeFiles +#include "../util.h" + +using namespace nnfw::kernel::acl; + +TEST(KernelACL_TC, concatFloat32_1) +{ + float inputData_1[6] = { + 1, 2, 3, 4, 5, 6 // [ [ [1],[2],[3] ], [ [4],[5],[6] ] ] + }; + float inputData_2[6] = { + 7, 8, 9, 10, 11, 12 // [ [ [7],[8],[9] ], [ [10],[11],[12] ] ] + }; + const nnfw::rt::Shape inputShape_1 = { OperandType::FLOAT32, {1,2,3,1}, 1.0, 0 }; + const nnfw::rt::Shape inputShape_2 = { OperandType::FLOAT32, {1,2,3,1}, 1.0, 0 }; + std::vector<const float*> inputDataPtrs; + std::vector<nnfw::rt::Shape> inputShapes; + float outputData[12]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,2,3,2}, 1.0, 0 }; + bool bret; + + inputDataPtrs.push_back(inputData_1); + inputDataPtrs.push_back(inputData_2); + inputShapes.push_back(inputShape_1); + inputShapes.push_back(inputShape_2); + + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = concatenationFloat32(inputDataPtrs, inputShapes, 3, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectNCHW[] = { + 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12 + }; + float expectData[12]; // [ [ [1,7],[2,8],[3,9] ], [ [4,10],[5,11],[6,12] ] ] + util::NCHW2NHWC(expectNCHW, expectData, outputShape); + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} diff --git a/libs/kernel/acl/src/cl/Conv2D.cpp b/libs/kernel/acl/src/cl/Conv2D.cpp new file mode 100644 index 000000000..4783bdc1d --- /dev/null +++ b/libs/kernel/acl/src/cl/Conv2D.cpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <NeuralNetworks.h> + +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> + +#include <util/environment.h> + +#include "../IO_accessor.h" +#include "../util.h" +#include "../shape.h" +#include "../CLUniqueTensor.h" +#include "../support.h" + +#include "util/feature/TextFormatter.h" + +#include "support/nnapi/feature/Reader.h" + +namespace nnfw { +namespace kernel { +namespace acl { + +static int verbose = 0; + +bool convFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + const float* filterData, const nnfw::rt::Shape& filterShape, + const float* biasData, const nnfw::rt::Shape& biasShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape) +{ + arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); + arm_compute::TensorShape filter_shape = util::fromNNShape(filterShape); + arm_compute::TensorShape bias_shape = util::fromVectorNNShape(biasShape); + arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); + arm_compute::PadStrideInfo conv_info = arm_compute::PadStrideInfo(stride_width, stride_height, + padding_left, padding_right, + padding_top, padding_bottom, + arm_compute::DimensionRoundingType::FLOOR); + + CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + CLUniqueTensor bias(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32)); + CLUniqueTensor filter(arm_compute::TensorInfo(filter_shape, arm_compute::Format::F32)); + + std::vector<std::shared_ptr<arm_compute::IFunction>> fns; + + auto conv_f = std::make_shared<arm_compute::CLConvolutionLayer>(); + + conv_f->configure(input.ptr(), filter.ptr(), bias.ptr(), output.ptr(), conv_info); + + fns.emplace_back(conv_f); + + util::insertFusedActivationLayer<CLUniqueTensor, arm_compute::CLActivationLayer>(output, activation, fns); + + input.allocate(); + output.allocate(); + bias.allocate(); + filter.allocate(); + + TensorAccess<InputAccessor>(input.ref(), inputData, inputShape); + TensorAccess<BiasAccessor>(bias.ref(), biasData, biasShape); + TensorAccess<WeightAccessor>(filter.ref(), filterData, filterShape); + + nnfw::util::env::IntAccessor("CONV2D_VERBOSE").access(verbose); + if (verbose) + { + input.ref().map(); + auto ifm_shape = nnfw::support::nnapi::feature::asFeatureShape(inputShape); + nnfw::support::nnapi::feature::Reader<float> nnapi_ifm_reader{ifm_shape, inputData}; + nnfw::support::acl::feature::Reader<float> acl_ifm_reader{input.ptr()}; + + std::cout << "NNAPI IFM:" << std::endl; + std::cout << nnfw::util::feature::TextFormatter<float>{ifm_shape, nnapi_ifm_reader} << std::endl; + + std::cout << "ARM Compute IFM:" << std::endl; + std::cout << nnfw::util::feature::TextFormatter<float>{ifm_shape, acl_ifm_reader} << std::endl; + input.ref().unmap(); + } + + for (const auto &fn : fns) + { + fn->run(); + } + + arm_compute::CLScheduler::get().sync(); + + TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape); + + return true; +} + +} // namespace acl +} // namespace kernel +} // namespace nnfw diff --git a/libs/kernel/acl/src/cl/Conv2D.test.cpp b/libs/kernel/acl/src/cl/Conv2D.test.cpp new file mode 100644 index 000000000..e34cdeea5 --- /dev/null +++ b/libs/kernel/acl/src/cl/Conv2D.test.cpp @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> +#include <OperationsUtils.h> +#include <kernel/acl/nnfw_kernel_acl.h> +#include <kernel/acl/Conv2D.h> + +// TODO: fix include path in CMakeFiles +#include "../util.h" + +using namespace nnfw::kernel::acl; + +TEST(KernelACL_TC, convFloat32_3x3to1x1) +{ + float inputData[9]; + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float filterData[9]; + const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float biasData[1] = { 1.0 }; + const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 }; + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + float outputData[1]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + bool bret; + + util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { 10.0f }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, convFloat32_3x3to3x3) +{ + float inputData[9]; + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float filterData[9]; + const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float biasData[1] = { 1.0 }; + const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 }; + int32_t padding_left = 1; + int32_t padding_right = 1; + int32_t padding_top = 1; + int32_t padding_bottom = 1; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + float outputData[9]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + bool bret; + + util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { + 5.0f, 7.0f, 5.0f, + 7.0f, 10.0f, 7.0f, + 5.0f, 7.0f, 5.0f + }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, convFloat32_3x3to3x3_RELU) +{ + float inputData[9]; + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float filterData[9]; + const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float biasData[1] = { -5.0f }; + const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 }; + int32_t padding_left = 1; + int32_t padding_right = 1; + int32_t padding_top = 1; + int32_t padding_bottom = 1; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + float outputData[9]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + bool bret; + + util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = + { + 0.0f, 1.0f, 0.0f, + 1.0f, 4.0f, 1.0f, + 0.0f, 1.0f, 0.0f + }; + + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, convFloat32_3x5to3x3) +{ + float inputData[15] = { + 1,2,3,4,5, + 6,7,8,9,10, + 11,12,13,14,15 + }; + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,5,1}, 1.0, 0 }; + float filterData[18] = { + 1,1,1, 1,1,1, 1,1,1, + 2,2,2, 2,2,2, 2,2,2 + }; + const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {2,3,3,1}, 1.0, 0 }; + float biasData[2] = { 1.0, 1.0 }; + const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {2}, 1.0, 0 }; + int32_t padding_left = 1; + int32_t padding_right = 1; + int32_t padding_top = 1; + int32_t padding_bottom = 1; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + float outputData[30]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,5,2}, 1.0, 0 }; + bool bret; + + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectNCHW[] = { + 17.0f, 28.0f, 34.0f, 40.0f, 29.0f, + 40.0f, 64.0f, 73.0f, 82.0f, 58.0f, + 37.0f, 58.0f, 64.0f, 70.0f, 49.0f, + + 33.0f, 55.0f, 67.0f, 79.0f, 57.0f, + 79.0f, 127.0f, 145.0f, 163.0f, 115.0f, + 73.0f, 115.0f, 127.0f, 139.0f, 97.0f + }; + float expectData[30]; + util::NCHW2NHWC(expectNCHW, expectData, outputShape); + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} diff --git a/libs/kernel/acl/src/cl/DepthwiseConv2D.cpp b/libs/kernel/acl/src/cl/DepthwiseConv2D.cpp new file mode 100644 index 000000000..7593a99f4 --- /dev/null +++ b/libs/kernel/acl/src/cl/DepthwiseConv2D.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> + +#include <cassert> + +// TODO: fix include path in CMakeFiles +#include "../IO_accessor.h" +#include "../shape.h" +#include "../CLUniqueTensor.h" +#include "../DepthwiseConv2D.h" + +namespace nnfw { +namespace kernel { +namespace acl { + +static void sync_scheduler() { + arm_compute::CLScheduler::get().sync(); +} + +bool depthwiseConvFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + const float* filterData, const nnfw::rt::Shape& filterShape, + const float* biasData, const nnfw::rt::Shape& biasShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t depth_multiplier, int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape) { + return common::depthwiseConvFloat32<CLUniqueTensor, arm_compute::CLDepthwiseConvolutionLayer, + arm_compute::CLActivationLayer>(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + depth_multiplier, activation, + outputData, outputShape, + sync_scheduler); +} + +} // namespace acl +} // namespace kernel +} // namespace nnfw + diff --git a/libs/kernel/acl/src/cl/DepthwiseConv2D.test.cpp b/libs/kernel/acl/src/cl/DepthwiseConv2D.test.cpp new file mode 100644 index 000000000..695563383 --- /dev/null +++ b/libs/kernel/acl/src/cl/DepthwiseConv2D.test.cpp @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define ACL_CORE_FUNC_NAME depthwiseConvFloat32 +#define ACL_TEST(tc, t) TEST(tc, cl_##t) + +#include "../DepthwiseConv2D.test.h" diff --git a/libs/kernel/acl/src/cl/FullyConnected.cpp b/libs/kernel/acl/src/cl/FullyConnected.cpp new file mode 100644 index 000000000..7513355ab --- /dev/null +++ b/libs/kernel/acl/src/cl/FullyConnected.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> + +#include <cassert> + +// TODO: fix include path in CMakeFiles +#include "../IO_accessor.h" +#include "../shape.h" +#include "../CLUniqueTensor.h" +#include "../FullyConnected.h" + +namespace nnfw { +namespace kernel { +namespace acl { + +void sync_scheduler() { + arm_compute::CLScheduler::get().sync(); +} + +bool fullyConnectedFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + const float* weightsData, const nnfw::rt::Shape& weightsShape, + const float* biasData, const nnfw::rt::Shape& biasShape, + int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape) { + return common::fullyConnectedFloat32<CLUniqueTensor, arm_compute::CLFullyConnectedLayer, + arm_compute::CLActivationLayer>(inputData, inputShape, + weightsData, weightsShape, + biasData, biasShape, + activation, + outputData, outputShape, + sync_scheduler); +} + +} // namespace acl +} // namespace kernel +} // namespace nnfw diff --git a/libs/kernel/acl/src/cl/FullyConnected.test.cpp b/libs/kernel/acl/src/cl/FullyConnected.test.cpp new file mode 100644 index 000000000..b1f5a095f --- /dev/null +++ b/libs/kernel/acl/src/cl/FullyConnected.test.cpp @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define ACL_CORE_FUNC_NAME fullyConnectedFloat32 +#define ACL_TEST(tc, t) TEST(tc, cl_##t) + +#include "../FullyConnected.test.h" diff --git a/libs/kernel/acl/src/cl/Pooling.cpp b/libs/kernel/acl/src/cl/Pooling.cpp new file mode 100644 index 000000000..e22eacccc --- /dev/null +++ b/libs/kernel/acl/src/cl/Pooling.cpp @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> +#include "../IO_accessor.h" +#include "../shape.h" +#include "../CLUniqueTensor.h" + +#include <cassert> + +namespace nnfw { +namespace kernel { +namespace acl { + +bool maxPoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t filter_width, int32_t filter_height, + int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape) +{ + arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); + arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); + + std::vector<std::shared_ptr<arm_compute::IFunction>> fns; + + arm_compute::PadStrideInfo pad_info = arm_compute::PadStrideInfo(stride_width, stride_height, + padding_left, padding_right, + padding_top, padding_bottom, + arm_compute::DimensionRoundingType::FLOOR); + + arm_compute::PoolingLayerInfo maxpool_info = arm_compute::PoolingLayerInfo(arm_compute::PoolingType::MAX, + arm_compute::Size2D(filter_width,filter_height), + pad_info, false); + + CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + + auto pool_f = std::make_shared<arm_compute::CLPoolingLayer>(); + pool_f->configure(input.ptr(), output.ptr(), maxpool_info); + + fns.emplace_back(pool_f); + + input.allocate(); + output.allocate(); + + util::insertFusedActivationLayer<CLUniqueTensor, arm_compute::CLActivationLayer>(output, activation, fns); + + TensorAccess<InputAccessor>(input.ref(), inputData, inputShape); + + for (const auto &fn : fns) + { + fn->run(); + } + + arm_compute::CLScheduler::get().sync(); + + TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape); + + return true; +} + +bool averagePoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t filter_width, int32_t filter_height, + int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape) +{ + arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); + arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); + + std::vector<std::shared_ptr<arm_compute::IFunction>> fns; + + arm_compute::PadStrideInfo pad_info = arm_compute::PadStrideInfo(stride_width, stride_height, + padding_left, padding_right, + padding_top, padding_bottom, + arm_compute::DimensionRoundingType::FLOOR); + + arm_compute::PoolingLayerInfo pool_info = arm_compute::PoolingLayerInfo(arm_compute::PoolingType::AVG, + arm_compute::Size2D(filter_width,filter_height), + pad_info, true); + + CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + + auto pool_f = std::make_shared<arm_compute::CLPoolingLayer>(); + pool_f->configure(input.ptr(), output.ptr(), pool_info); + + fns.emplace_back(pool_f); + + input.allocate(); + output.allocate(); + + util::insertFusedActivationLayer<CLUniqueTensor, arm_compute::CLActivationLayer>(output, activation, fns); + + TensorAccess<InputAccessor>(input.ref(), inputData, inputShape); + + for (const auto &fn : fns) + { + fn->run(); + } + + arm_compute::CLScheduler::get().sync(); + + TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape); + + return true; +} + +} // namespace acl +} // namespace kernel +} // namespace nnfw diff --git a/libs/kernel/acl/src/cl/Pooling.test.cpp b/libs/kernel/acl/src/cl/Pooling.test.cpp new file mode 100644 index 000000000..8112e7a45 --- /dev/null +++ b/libs/kernel/acl/src/cl/Pooling.test.cpp @@ -0,0 +1,482 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> +#include <OperationsUtils.h> +#include <kernel/acl/nnfw_kernel_acl.h> +#include <arm_compute/core/Types.h> +#include <kernel/acl/Pooling.h> + +#include "../util.h" + +using namespace nnfw::kernel::acl; + +TEST(KernelACL_TC, maxPoolFloat32_3x3to1x1) +{ + util::TensorWrapper input({1,3,3,1}); + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t filter_width = 3; + int32_t filter_height = 3; + + float outputData[1]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + bool bret; + + float value = 1.0f; + input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return value++; + }); + + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + int32_t activation = ANEURALNETWORKS_FUSED_NONE; + + bret = maxPoolFloat32(input.ptr<float>(), inputShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { 9.0f }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, maxPoolFloat32_3x3to1x1_RELU) +{ + util::TensorWrapper input({1,3,3,1}); + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t filter_width = 3; + int32_t filter_height = 3; + + float outputData[1]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + bool bret; + + float value = -1.0f; + input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return value--; + }); + + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + int32_t activation = ANEURALNETWORKS_FUSED_RELU; + + bret = maxPoolFloat32(input.ptr<float>(), inputShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { 0.0f }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, maxPoolFloat32_3x3to2x2) +{ + util::TensorWrapper input({1,3,3,1}); + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + int32_t padding_left = 0; + int32_t padding_right = 1; + int32_t padding_top = 0; + int32_t padding_bottom = 1; + int32_t stride_width = 2; + int32_t stride_height = 2; + int32_t filter_width = 2; + int32_t filter_height = 2; + + float outputData[4]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,2,2,1}, 1.0, 0 }; + bool bret; + + float value = 1.0f; + input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return value++; + }); + + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + int32_t activation = ANEURALNETWORKS_FUSED_NONE; + + bret = maxPoolFloat32(input.ptr<float>(), inputShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { + 5.0f, 6.0f, + 8.0f, 9.0f + }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, maxPoolFloat32_147x147to73x73) +{ + util::TensorWrapper input({1,147,147,64}); + util::TensorWrapper output({1,73,73,64}); + + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 2; + int32_t stride_height = 2; + int32_t filter_width = 3; + int32_t filter_height = 3; + + input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.0f; + }); + + output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 0.f; + }); + + int32_t activation = ANEURALNETWORKS_FUSED_NONE; + + bool bret = maxPoolFloat32(input.ptr<float>(), input.shape(), + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + output.ptr<float>(), output.shape()); + EXPECT_EQ(bret, true); + + util::TensorWrapper expected({1,73,73,64}); + expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.0f; + }); + + EXPECT_EQ(output, expected); +} + +TEST(KernelACL_TC, maxPoolFloat32_71x71to35x35) +{ + util::TensorWrapper input({1,71,71,192}); + util::TensorWrapper output({1,35,35,192}); + + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 2; + int32_t stride_height = 2; + int32_t filter_width = 3; + int32_t filter_height = 3; + + input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.0f; + }); + + output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 0.f; + }); + + int32_t activation = ANEURALNETWORKS_FUSED_NONE; + + bool bret = maxPoolFloat32(input.ptr<float>(), input.shape(), + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + output.ptr<float>(), output.shape()); + EXPECT_EQ(bret, true); + + util::TensorWrapper expected({1,35,35,192}); + expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.0f; + }); + + EXPECT_EQ(output, expected); +} + +TEST(KernelACL_TC, averagePoolFloat32_3x3to1x1) +{ + util::TensorWrapper input({1,3,3,1}); + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t filter_width = 3; + int32_t filter_height = 3; + + float outputData[1]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + bool bret; + + float value = 1.0f; + input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return value++; + }); + + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + int32_t activation = ANEURALNETWORKS_FUSED_NONE; + + bret = averagePoolFloat32(input.ptr<float>(), inputShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { 5.0f }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, averagePoolFloat32_3x3to1x1_RELU) +{ + util::TensorWrapper input({1,3,3,1}); + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t filter_width = 3; + int32_t filter_height = 3; + + float outputData[1]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + bool bret; + + float value = 3.0f; + input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return value--; + }); + + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + int32_t activation = ANEURALNETWORKS_FUSED_RELU; + + bret = averagePoolFloat32(input.ptr<float>(), inputShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { 0.0f }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, averagePoolFloat32_3x3to2x2) +{ + util::TensorWrapper input({1,3,3,1}); + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t filter_width = 2; + int32_t filter_height = 2; + + float outputData[4]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,2,2,1}, 1.0, 0 }; + bool bret; + + float value = 1.0f; + input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return value++; + }); + + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + int32_t activation = ANEURALNETWORKS_FUSED_NONE; + + bret = averagePoolFloat32(input.ptr<float>(), inputShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { + 3.0f, 4.0f, + 6.0f, 7.0f + }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, averagePoolFloat32_3x3to3x3) +{ + std::vector<uint32_t> dims = {1,3,3,1}; + util::TensorWrapper input(dims); + util::TensorWrapper output(dims); + + int32_t padding_left = 1; + int32_t padding_right = 1; + int32_t padding_top = 1; + int32_t padding_bottom = 1; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t filter_width = 3; + int32_t filter_height = 3; + + int32_t value=1.0f; + input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return value++; + }); + + output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 0.f; + }); + + int32_t activation = ANEURALNETWORKS_FUSED_NONE; + + bool bret = averagePoolFloat32(input.ptr<float>(), input.shape(), + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + output.ptr<float>(), output.shape()); + EXPECT_EQ(bret, true); + + util::TensorWrapper expected(dims); + float v=2.5f; + expected.initValue([&v](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + v = v + 0.5f; + return v; + }); + + EXPECT_EQ(output, expected); +} + +TEST(KernelACL_TC, averagePoolFloat32_35x35to35x35) +{ + int32_t N=35; + std::vector<uint32_t> dims = {1,35,35,768}; + util::TensorWrapper input(dims); + util::TensorWrapper output(dims); + + int32_t padding_left = 1; + int32_t padding_right = 1; + int32_t padding_top = 1; + int32_t padding_bottom = 1; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t filter_width = 3; + int32_t filter_height = 3; + + input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.0f; + }); + + output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 0.f; + }); + + int32_t activation = ANEURALNETWORKS_FUSED_NONE; + + bool bret = averagePoolFloat32(input.ptr<float>(), input.shape(), + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + output.ptr<float>(), output.shape()); + EXPECT_EQ(bret, true); + + util::TensorWrapper expected(dims); + expected.initValue([&N](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.0f; + }); + + EXPECT_EQ(output, expected); +} + +TEST(KernelACL_TC, averagePoolFloat32_8x8to1x1) +{ + util::TensorWrapper input({1,8,8,2048}); + util::TensorWrapper output({1,1,1,2048}); + + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 2; + int32_t stride_height = 2; + int32_t filter_width = 8; + int32_t filter_height = 8; + + input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.0f; + }); + + output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 0.f; + }); + + int32_t activation = ANEURALNETWORKS_FUSED_NONE; + + bool bret = averagePoolFloat32(input.ptr<float>(), input.shape(), + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + output.ptr<float>(), output.shape()); + EXPECT_EQ(bret, true); + + util::TensorWrapper expected({1,1,1,2048}); + expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.0f; + }); + + EXPECT_EQ(output, expected); +} diff --git a/libs/kernel/acl/src/cl/Reshape.cpp b/libs/kernel/acl/src/cl/Reshape.cpp new file mode 100644 index 000000000..e420ab92b --- /dev/null +++ b/libs/kernel/acl/src/cl/Reshape.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> + +// TODO: fix include path in CMakeFiles +#include "../IO_accessor.h" +#include "../shape.h" +#include "../CLUniqueTensor.h" +#include "../Reshape.h" + +namespace nnfw { +namespace kernel { +namespace acl { + +static void sync_scheduler() { + arm_compute::CLScheduler::get().sync(); +} + +bool reshapeGeneric(const void* inputData, const nnfw::rt::Shape& inputShape, + void* outputData, const nnfw::rt::Shape& outputShape) { + return common::reshapeGeneric<CLUniqueTensor, arm_compute::CLReshapeLayer> + (inputData, inputShape, outputData, outputShape, sync_scheduler); +} + +} // namespace acl +} // namespace kernel +} // namespace nnfw diff --git a/libs/kernel/acl/src/cl/Reshape.test.cpp b/libs/kernel/acl/src/cl/Reshape.test.cpp new file mode 100644 index 000000000..db23a6d3d --- /dev/null +++ b/libs/kernel/acl/src/cl/Reshape.test.cpp @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define ACL_CORE_FUNC_NAME reshapeGeneric +#define ACL_TEST(tc, t) TEST(tc, cl_##t) + +#include "../Reshape.test.h" diff --git a/libs/kernel/acl/src/cl/Softmax.cpp b/libs/kernel/acl/src/cl/Softmax.cpp new file mode 100644 index 000000000..a628f05fe --- /dev/null +++ b/libs/kernel/acl/src/cl/Softmax.cpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <NeuralNetworks.h> + +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> +#include "../IO_accessor.h" +#include "../shape.h" +#include "../CLUniqueTensor.h" +#include "../util.h" + +namespace nnfw { +namespace kernel { +namespace acl { + +bool softmaxFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + const float beta, + float* outputData, const nnfw::rt::Shape& outputShape) +{ + arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); + arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); + + CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + + auto softmax_f = std::make_shared<arm_compute::CLSoftmaxLayer>(); + softmax_f->configure(input.ptr(), output.ptr(), beta); + + input.allocate(); + output.allocate(); + + if (inputShape.dimensions.size() == 4) + { + TensorAccess<InputAccessor>(input.ref(), inputData, inputShape); + + softmax_f->run(); + + arm_compute::CLScheduler::get().sync(); + + TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape); + } + else if (inputShape.dimensions.size() == 2) + { + TensorAccess<MatrixInputAccessor>(input.ref(), inputData, inputShape); + + softmax_f->run(); + + arm_compute::CLScheduler::get().sync(); + + TensorAccess<MatrixOutputAccessor>(output.ref(), outputData, outputShape); + } + else + { + assert("undefined dimension of input" && 0); + return false; + } + + return true; +} + +} // namespace acl +} // namespace kernel +} // namespace nnfw diff --git a/libs/kernel/acl/src/cl/Softmax.test.cpp b/libs/kernel/acl/src/cl/Softmax.test.cpp new file mode 100644 index 000000000..8ee8b41e2 --- /dev/null +++ b/libs/kernel/acl/src/cl/Softmax.test.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> +#include <OperationsUtils.h> +#include <kernel/acl/nnfw_kernel_acl.h> +#include <arm_compute/core/Types.h> +#include <kernel/acl/Softmax.h> + +#include "../util.h" + +using namespace nnfw::kernel::acl; + +TEST(KernelACL_TC, softmaxFloat32_1xn) +{ + float inputData[4]; + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,4}, 1.0, 0 }; + float outputData[4]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,4}, 1.0, 0 }; + const float beta = 1.0f; + bool bret; + + util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = softmaxFloat32(inputData, inputShape, beta, outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { 0.25f, 0.25f, 0.25f, 0.25f }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, softmaxFloat32_4d) +{ + float inputData[4]; + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 }; + float outputData[4]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 }; + const float beta = 1.0f; + bool bret; + + util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = softmaxFloat32(inputData, inputShape, beta, outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { 0.25f, 0.25f, 0.25f, 0.25f }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, softmaxFloat32_1xn_seq) +{ + float inputData[4]; + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,4}, 1.0, 0 }; + float outputData[4]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,4}, 1.0, 0 }; + const float beta = 1.0f; + bool bret; + + util::initData_Increasing(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = softmaxFloat32(inputData, inputShape, beta, outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = {0.032058603280085, 0.0871443187420326, 0.23688281808991, 0.643914259887972}; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, softmaxFloat32_4d_seq) +{ + float inputData[4]; + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 }; + float outputData[4]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 }; + const float beta = 1.0f; + bool bret; + + util::initData_Increasing(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = softmaxFloat32(inputData, inputShape, beta, outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = {0.032058603280085, 0.0871443187420326, 0.23688281808991, 0.643914259887972}; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} diff --git a/libs/kernel/acl/src/gtest_env.cpp b/libs/kernel/acl/src/gtest_env.cpp new file mode 100644 index 000000000..f6fc52f7a --- /dev/null +++ b/libs/kernel/acl/src/gtest_env.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> +#include <kernel/acl/nnfw_kernel_acl.h> + +class TestEnvironment : public ::testing::Environment +{ +public: + virtual ~TestEnvironment() = default; + + virtual void SetUp() + { + nnfw::kernel::acl::Initialize(); + } + + virtual void TearDown() + { + // DO NOTHING + } +}; + +static ::testing::Environment* const testingenv = + ::testing::AddGlobalTestEnvironment(new TestEnvironment); diff --git a/libs/kernel/acl/src/neon/Concatenation.cpp b/libs/kernel/acl/src/neon/Concatenation.cpp new file mode 100644 index 000000000..8738a9d12 --- /dev/null +++ b/libs/kernel/acl/src/neon/Concatenation.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> + +#include <cassert> + +// TODO: fix include path in CMakeFiles +#include "../IO_accessor.h" +#include "../shape.h" + +namespace nnfw { +namespace kernel { +namespace acl { +namespace neon { + +bool concatenationFloat32(const std::vector<const float*>& inputDataPtrs, + const std::vector<nnfw::rt::Shape>& inputShapes, int32_t axis, + float* outputData, const nnfw::rt::Shape& outputShape) +{ + if (axis != 3) + { + assert("Only support axis=3 for ACL" && 0); + return false; + } + assert(inputDataPtrs.size() == inputShapes.size()); + + std::vector<arm_compute::Tensor*> inputPtrs; + std::vector<arm_compute::ITensor*> inputIptrs; + arm_compute::Tensor output; + + // init Tensors + std::vector<nnfw::rt::Shape>::const_iterator it_inputShape = inputShapes.begin(); + for (auto inputData : inputDataPtrs) + { + const nnfw::rt::Shape& inputShape = *it_inputShape; + arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); + arm_compute::Tensor* inputPtr = new arm_compute::Tensor(); + + inputPtr->allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + inputPtrs.push_back(inputPtr); + inputIptrs.push_back(inputPtr); + + it_inputShape++; + } + arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); + output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + + // prepare ACL Concatenate and configure tensors + auto concat = std::make_shared<arm_compute::NEDepthConcatenateLayer>(); + concat->configure(inputIptrs, &output); + + // allocate Tensors + it_inputShape = inputShapes.begin(); + std::vector<const float*>::const_iterator it_inputData = inputDataPtrs.begin(); + for (auto inputPtr : inputPtrs) + { + inputPtr->allocator()->allocate(); + + const float* inputData = *it_inputData; + const nnfw::rt::Shape& inputShape = *it_inputShape; + + TensorAccess<InputAccessor>(*inputPtr, inputData, inputShape); + + it_inputShape++; + it_inputData++; + } + output.allocator()->allocate(); + + // run + concat->run(); + + // get output + TensorAccess<OutputAccessor>(output, outputData, outputShape); + + // cleanup + for (auto inputPtr : inputPtrs) + { + inputPtr->allocator()->free(); + delete inputPtr; + } + output.allocator()->free(); + + return true; +} + +} // namespace neon +} // namespace acl +} // namespace kernel +} // namespace nnfw diff --git a/libs/kernel/acl/src/neon/Concatenation.test.cpp b/libs/kernel/acl/src/neon/Concatenation.test.cpp new file mode 100644 index 000000000..03b05bd24 --- /dev/null +++ b/libs/kernel/acl/src/neon/Concatenation.test.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> +#include <OperationsUtils.h> +#include <kernel/acl/nnfw_kernel_acl.h> +#include <kernel/acl/Concatenation.h> + +// TODO: fix include path in CMakeFiles +#include "../util.h" + +using namespace nnfw::kernel::acl; + +TEST(KernelACL_TC, neon_concatFloat32_1) +{ + float inputData_1[6] = { + 1, 2, 3, 4, 5, 6 // [ [ [1],[2],[3] ], [ [4],[5],[6] ] ] + }; + float inputData_2[6] = { + 7, 8, 9, 10, 11, 12 // [ [ [7],[8],[9] ], [ [10],[11],[12] ] ] + }; + const nnfw::rt::Shape inputShape_1 = { OperandType::FLOAT32, {1,2,3,1}, 1.0, 0 }; + const nnfw::rt::Shape inputShape_2 = { OperandType::FLOAT32, {1,2,3,1}, 1.0, 0 }; + std::vector<const float*> inputDataPtrs; + std::vector<nnfw::rt::Shape> inputShapes; + float outputData[12]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,2,3,2}, 1.0, 0 }; + bool bret; + + inputDataPtrs.push_back(inputData_1); + inputDataPtrs.push_back(inputData_2); + inputShapes.push_back(inputShape_1); + inputShapes.push_back(inputShape_2); + + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = neon::concatenationFloat32(inputDataPtrs, inputShapes, 3, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectNCHW[] = { + 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12 + }; + float expectData[12]; // [ [ [1,7],[2,8],[3,9] ], [ [4,10],[5,11],[6,12] ] ] + util::NCHW2NHWC(expectNCHW, expectData, outputShape); + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} diff --git a/libs/kernel/acl/src/neon/Conv2D.cpp b/libs/kernel/acl/src/neon/Conv2D.cpp new file mode 100644 index 000000000..679ecfced --- /dev/null +++ b/libs/kernel/acl/src/neon/Conv2D.cpp @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <NeuralNetworks.h> + +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> + +#include <util/environment.h> + +#include "../IO_accessor.h" +#include "../util.h" +#include "../shape.h" +#include "../NEUniqueTensor.h" +#include "../support.h" + +#include "util/feature/TextFormatter.h" + +#include "support/nnapi/feature/Reader.h" + +namespace nnfw { +namespace kernel { +namespace acl { +namespace neon { + +static int verbose = 0; + +bool convFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + const float* filterData, const nnfw::rt::Shape& filterShape, + const float* biasData, const nnfw::rt::Shape& biasShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape) +{ + arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); + arm_compute::TensorShape filter_shape = util::fromNNShape(filterShape); + arm_compute::TensorShape bias_shape = util::fromVectorNNShape(biasShape); + arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); + arm_compute::PadStrideInfo conv_info = arm_compute::PadStrideInfo(stride_width, stride_height, + padding_left, padding_right, + padding_top, padding_bottom, + arm_compute::DimensionRoundingType::FLOOR); + + NEUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + NEUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + NEUniqueTensor bias(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32)); + NEUniqueTensor filter(arm_compute::TensorInfo(filter_shape, arm_compute::Format::F32)); + + std::vector<std::shared_ptr<arm_compute::IFunction>> fns; + + auto conv_f = std::make_shared<arm_compute::NEConvolutionLayer>(); + + conv_f->configure(input.ptr(), filter.ptr(), bias.ptr(), output.ptr(), conv_info); + + fns.emplace_back(conv_f); + + util::insertFusedActivationLayer<NEUniqueTensor, arm_compute::NEActivationLayer>(output, activation, fns); + + input.allocate(); + output.allocate(); + bias.allocate(); + filter.allocate(); + + TensorAccess<InputAccessor>(input.ref(), inputData, inputShape); + TensorAccess<BiasAccessor>(bias.ref(), biasData, biasShape); + TensorAccess<WeightAccessor>(filter.ref(), filterData, filterShape); + + nnfw::util::env::IntAccessor("CONV2D_VERBOSE").access(verbose); + if (verbose) + { + auto ifm_shape = nnfw::support::nnapi::feature::asFeatureShape(inputShape); + nnfw::support::nnapi::feature::Reader<float> nnapi_ifm_reader{ifm_shape, inputData}; + nnfw::support::acl::feature::Reader<float> acl_ifm_reader{ input.ptr() }; + + std::cout << "NNAPI IFM:" << std::endl; + std::cout << nnfw::util::feature::TextFormatter<float>{ifm_shape, nnapi_ifm_reader} << std::endl; + + std::cout << "ARM Compute IFM:" << std::endl; + std::cout << nnfw::util::feature::TextFormatter<float>{ifm_shape, acl_ifm_reader} << std::endl; + } + + for (const auto &fn : fns) + { + fn->run(); + } + + TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape); + + return true; +} + +} // namespace neon +} // namespace acl +} // namespace kernel +} // namespace nnfw diff --git a/libs/kernel/acl/src/neon/Conv2D.test.cpp b/libs/kernel/acl/src/neon/Conv2D.test.cpp new file mode 100644 index 000000000..6a3de1c43 --- /dev/null +++ b/libs/kernel/acl/src/neon/Conv2D.test.cpp @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> +#include <OperationsUtils.h> +#include <kernel/acl/nnfw_kernel_acl.h> +#include <kernel/acl/Conv2D.h> + +// TODO: fix include path in CMakeFiles +#include "../util.h" + +using namespace nnfw::kernel::acl; + +TEST(KernelACL_TC, neon_convFloat32_3x3to1x1) +{ + float inputData[9]; + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float filterData[9]; + const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float biasData[1] = { 1.0 }; + const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 }; + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + float outputData[1]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + bool bret; + + util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = neon::convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { 10.0f }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, neon_convFloat32_3x3to3x3) +{ + float inputData[9]; + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float filterData[9]; + const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float biasData[1] = { 1.0 }; + const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 }; + int32_t padding_left = 1; + int32_t padding_right = 1; + int32_t padding_top = 1; + int32_t padding_bottom = 1; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + float outputData[9]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + bool bret; + + util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = neon::convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { + 5.0f, 7.0f, 5.0f, + 7.0f, 10.0f, 7.0f, + 5.0f, 7.0f, 5.0f + }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, neon_convFloat32_3x3to3x3_RELU) +{ + float inputData[9]; + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float filterData[9]; + const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float biasData[1] = { -5.0f }; + const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 }; + int32_t padding_left = 1; + int32_t padding_right = 1; + int32_t padding_top = 1; + int32_t padding_bottom = 1; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + float outputData[9]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + bool bret; + + util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = neon::convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = + { + 0.0f, 1.0f, 0.0f, + 1.0f, 4.0f, 1.0f, + 0.0f, 1.0f, 0.0f + }; + + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, neon_convFloat32_3x5to3x3) +{ + float inputData[15] = { + 1,2,3,4,5, + 6,7,8,9,10, + 11,12,13,14,15 + }; + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,5,1}, 1.0, 0 }; + float filterData[18] = { + 1,1,1, 1,1,1, 1,1,1, + 2,2,2, 2,2,2, 2,2,2 + }; + const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {2,3,3,1}, 1.0, 0 }; + float biasData[2] = { 1.0, 1.0 }; + const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {2}, 1.0, 0 }; + int32_t padding_left = 1; + int32_t padding_right = 1; + int32_t padding_top = 1; + int32_t padding_bottom = 1; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + float outputData[30]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,5,2}, 1.0, 0 }; + bool bret; + + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = neon::convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectNCHW[] = { + 17.0f, 28.0f, 34.0f, 40.0f, 29.0f, + 40.0f, 64.0f, 73.0f, 82.0f, 58.0f, + 37.0f, 58.0f, 64.0f, 70.0f, 49.0f, + + 33.0f, 55.0f, 67.0f, 79.0f, 57.0f, + 79.0f, 127.0f, 145.0f, 163.0f, 115.0f, + 73.0f, 115.0f, 127.0f, 139.0f, 97.0f + }; + float expectData[30]; + util::NCHW2NHWC(expectNCHW, expectData, outputShape); + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} diff --git a/libs/kernel/acl/src/neon/DepthwiseConv2D.cpp b/libs/kernel/acl/src/neon/DepthwiseConv2D.cpp new file mode 100644 index 000000000..bcf56c667 --- /dev/null +++ b/libs/kernel/acl/src/neon/DepthwiseConv2D.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> +#include <arm_compute/runtime/NEON/NEScheduler.h> + +#include <cassert> + +// TODO: fix include path in CMakeFiles +#include "../IO_accessor.h" +#include "../shape.h" +#include "../NEUniqueTensor.h" +#include "../DepthwiseConv2D.h" + +namespace nnfw { +namespace kernel { +namespace acl { + +namespace neon { +static void sync_scheduler() { +} + +bool depthwiseConvFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + const float* filterData, const nnfw::rt::Shape& filterShape, + const float* biasData, const nnfw::rt::Shape& biasShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t depth_multiplier, int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape) { + return common::depthwiseConvFloat32<NEUniqueTensor, arm_compute::NEDepthwiseConvolutionLayer, + arm_compute::NEActivationLayer>(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + depth_multiplier, activation, + outputData, outputShape, + sync_scheduler); +} + +} // namespace neon +} // namespace acl +} // namespace kernel +} // namespace nnfw diff --git a/libs/kernel/acl/src/neon/DepthwiseConv2D.test.cpp b/libs/kernel/acl/src/neon/DepthwiseConv2D.test.cpp new file mode 100644 index 000000000..d729d538e --- /dev/null +++ b/libs/kernel/acl/src/neon/DepthwiseConv2D.test.cpp @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define ACL_CORE_FUNC_NAME neon::depthwiseConvFloat32 +#define ACL_TEST(tc, t) TEST(tc, neon_##t) + +#include "../DepthwiseConv2D.test.h" diff --git a/libs/kernel/acl/src/neon/FullyConnected.cpp b/libs/kernel/acl/src/neon/FullyConnected.cpp new file mode 100644 index 000000000..86229cbf2 --- /dev/null +++ b/libs/kernel/acl/src/neon/FullyConnected.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> +#include <arm_compute/runtime/NEON/NEScheduler.h> + +#include <cassert> + +// TODO: fix include path in CMakeFiles +#include "../IO_accessor.h" +#include "../shape.h" +#include "../NEUniqueTensor.h" +#include "../FullyConnected.h" + +namespace nnfw { +namespace kernel { +namespace acl { + +namespace neon { + +void sync_scheduler() { +} + +bool fullyConnectedFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + const float* weightsData, const nnfw::rt::Shape& weightsShape, + const float* biasData, const nnfw::rt::Shape& biasShape, + int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape) { + + return common::fullyConnectedFloat32<NEUniqueTensor, arm_compute::NEFullyConnectedLayer, + arm_compute::NEActivationLayer>(inputData, inputShape, + weightsData, weightsShape, + biasData, biasShape, + activation, + outputData, outputShape, + sync_scheduler); +} + +} // namespace neon +} // namespace acl +} // namespace kernel +} // namespace nnfw + diff --git a/libs/kernel/acl/src/neon/FullyConnected.test.cpp b/libs/kernel/acl/src/neon/FullyConnected.test.cpp new file mode 100644 index 000000000..d4c95e4cb --- /dev/null +++ b/libs/kernel/acl/src/neon/FullyConnected.test.cpp @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define ACL_CORE_FUNC_NAME neon::fullyConnectedFloat32 +#define ACL_TEST(tc, t) TEST(tc, neon_##t) + +#include "../FullyConnected.test.h" + diff --git a/libs/kernel/acl/src/neon/Pooling.cpp b/libs/kernel/acl/src/neon/Pooling.cpp new file mode 100644 index 000000000..5c58ae0b5 --- /dev/null +++ b/libs/kernel/acl/src/neon/Pooling.cpp @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> +#include "../IO_accessor.h" +#include "../shape.h" +#include "../NEUniqueTensor.h" + +#include <cassert> + +namespace nnfw { +namespace kernel { +namespace acl { +namespace neon { + +bool maxPoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t filter_width, int32_t filter_height, + int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape) +{ + arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); + arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); + + std::vector<std::shared_ptr<arm_compute::IFunction>> fns; + + arm_compute::PadStrideInfo pad_info = arm_compute::PadStrideInfo(stride_width, stride_height, + padding_left, padding_right, + padding_top, padding_bottom, + arm_compute::DimensionRoundingType::FLOOR); + + arm_compute::PoolingLayerInfo maxpool_info = arm_compute::PoolingLayerInfo(arm_compute::PoolingType::MAX, + arm_compute::Size2D(filter_width,filter_height), + pad_info, false); + + NEUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + NEUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + + auto pool_f = std::make_shared<arm_compute::NEPoolingLayer>(); + pool_f->configure(input.ptr(), output.ptr(), maxpool_info); + + fns.emplace_back(pool_f); + + util::insertFusedActivationLayer<NEUniqueTensor, arm_compute::NEActivationLayer>(output, activation, fns); + + input.allocate(); + output.allocate(); + + TensorAccess<InputAccessor>(input.ref(), inputData, inputShape); + + for (const auto &fn : fns) + { + fn->run(); + } + + TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape); + + return true; +} + +bool averagePoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t filter_width, int32_t filter_height, + int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape) +{ + arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); + arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); + + std::vector<std::shared_ptr<arm_compute::IFunction>> fns; + + arm_compute::PadStrideInfo pad_info = arm_compute::PadStrideInfo(stride_width, stride_height, + padding_left, padding_right, + padding_top, padding_bottom, + arm_compute::DimensionRoundingType::FLOOR); + + arm_compute::PoolingLayerInfo pool_info = arm_compute::PoolingLayerInfo(arm_compute::PoolingType::AVG, + arm_compute::Size2D(filter_width,filter_height), + pad_info, true); + + NEUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + NEUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + + auto pool_f = std::make_shared<arm_compute::NEPoolingLayer>(); + pool_f->configure(input.ptr(), output.ptr(), pool_info); + + fns.emplace_back(pool_f); + + util::insertFusedActivationLayer<NEUniqueTensor, arm_compute::NEActivationLayer>(output, activation, fns); + + input.allocate(); + output.allocate(); + + TensorAccess<InputAccessor>(input.ref(), inputData, inputShape); + + for (const auto &fn : fns) + { + fn->run(); + } + + TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape); + + return true; +} + +} // namespace neon +} // namespace acl +} // namespace kernel +} // namespace nnfw diff --git a/libs/kernel/acl/src/neon/Pooling.test.cpp b/libs/kernel/acl/src/neon/Pooling.test.cpp new file mode 100644 index 000000000..4e6593921 --- /dev/null +++ b/libs/kernel/acl/src/neon/Pooling.test.cpp @@ -0,0 +1,436 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> +#include <OperationsUtils.h> +#include <kernel/acl/nnfw_kernel_acl.h> +#include <arm_compute/core/Types.h> +#include <kernel/acl/Pooling.h> + +#include "../util.h" + +using namespace nnfw::kernel::acl; + +TEST(KernelACL_TC, neon_maxPoolFloat32_3x3to1x1) +{ + util::TensorWrapper input({1,3,3,1}); + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t filter_width = 3; + int32_t filter_height = 3; + + float outputData[1]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + bool bret; + + float value = 1.0f; + input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return value++; + }); + + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + int32_t activation = ANEURALNETWORKS_FUSED_NONE; + + bret = neon::maxPoolFloat32(input.ptr<float>(), inputShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { 9.0f }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, neon_maxPoolFloat32_3x3to1x1_RELU) +{ + util::TensorWrapper input({1,3,3,1}); + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t filter_width = 3; + int32_t filter_height = 3; + + float outputData[1]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + bool bret; + + float value = -1.0f; + input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return value--; + }); + + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + int32_t activation = ANEURALNETWORKS_FUSED_RELU; + + bret = neon::maxPoolFloat32(input.ptr<float>(), inputShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { 0.0f }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, neon_maxPoolFloat32_3x3to2x2) +{ + util::TensorWrapper input({1,3,3,1}); + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + int32_t padding_left = 0; + int32_t padding_right = 1; + int32_t padding_top = 0; + int32_t padding_bottom = 1; + int32_t stride_width = 2; + int32_t stride_height = 2; + int32_t filter_width = 2; + int32_t filter_height = 2; + + float outputData[4]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,2,2,1}, 1.0, 0 }; + bool bret; + + float value = 1.0f; + input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return value++; + }); + + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + int32_t activation = ANEURALNETWORKS_FUSED_NONE; + + bret = neon::maxPoolFloat32(input.ptr<float>(), inputShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { + 5.0f, 6.0f, + 8.0f, 9.0f + }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, neon_maxPoolFloat32_147x147to73x73) +{ + util::TensorWrapper input({1,147,147,64}); + util::TensorWrapper output({1,73,73,64}); + + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 2; + int32_t stride_height = 2; + int32_t filter_width = 3; + int32_t filter_height = 3; + + input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.0f; + }); + + output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 0.f; + }); + + int32_t activation = ANEURALNETWORKS_FUSED_NONE; + + bool bret = neon::maxPoolFloat32(input.ptr<float>(), input.shape(), + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + output.ptr<float>(), output.shape()); + EXPECT_EQ(bret, true); + + util::TensorWrapper expected({1,73,73,64}); + expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.0f; + }); + + EXPECT_EQ(output, expected); +} + +TEST(KernelACL_TC, neon_maxPoolFloat32_71x71to35x35) +{ + util::TensorWrapper input({1,71,71,192}); + util::TensorWrapper output({1,35,35,192}); + + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 2; + int32_t stride_height = 2; + int32_t filter_width = 3; + int32_t filter_height = 3; + + input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.0f; + }); + + output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 0.f; + }); + + int32_t activation = ANEURALNETWORKS_FUSED_NONE; + + bool bret = neon::maxPoolFloat32(input.ptr<float>(), input.shape(), + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + output.ptr<float>(), output.shape()); + EXPECT_EQ(bret, true); + + util::TensorWrapper expected({1,35,35,192}); + expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.0f; + }); + + EXPECT_EQ(output, expected); +} + +TEST(KernelACL_TC, neon_averagePoolFloat32_3x3to1x1) +{ + util::TensorWrapper input({1,3,3,1}); + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t filter_width = 3; + int32_t filter_height = 3; + + float outputData[1]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + bool bret; + + float value = 1.0f; + input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return value++; + }); + + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + int32_t activation = ANEURALNETWORKS_FUSED_NONE; + + bret = neon::averagePoolFloat32(input.ptr<float>(), inputShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { 5.0f }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, neon_averagePoolFloat32_3x3to1x1_RELU) +{ + util::TensorWrapper input({1,3,3,1}); + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t filter_width = 3; + int32_t filter_height = 3; + + float outputData[1]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + bool bret; + + float value = 3.0f; + input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return value--; + }); + + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + int32_t activation = ANEURALNETWORKS_FUSED_RELU; + + bret = neon::averagePoolFloat32(input.ptr<float>(), inputShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { 0.0f }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, neon_averagePoolFloat32_3x3to2x2) +{ + util::TensorWrapper input({1,3,3,1}); + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t filter_width = 2; + int32_t filter_height = 2; + + float outputData[4]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,2,2,1}, 1.0, 0 }; + bool bret; + + float value = 1.0f; + input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return value++; + }); + + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + int32_t activation = ANEURALNETWORKS_FUSED_NONE; + + bret = neon::averagePoolFloat32(input.ptr<float>(), inputShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { + 3.0f, 4.0f, + 6.0f, 7.0f + }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, neon_averagePoolFloat32_35x35to35x35) +{ + std::vector<uint32_t> dims = {1,35,35,192}; + util::TensorWrapper input(dims); + util::TensorWrapper output(dims); + + int32_t padding_left = 1; + int32_t padding_right = 1; + int32_t padding_top = 1; + int32_t padding_bottom = 1; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t filter_width = 3; + int32_t filter_height = 3; + + input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.0f; + }); + + output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 0.f; + }); + + int32_t activation = ANEURALNETWORKS_FUSED_NONE; + + bool bret = neon::averagePoolFloat32(input.ptr<float>(), input.shape(), + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + output.ptr<float>(), output.shape()); + EXPECT_EQ(bret, true); + + util::TensorWrapper expected(dims); + expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.0f; + }); + + EXPECT_EQ(output, expected); +} + +TEST(KernelACL_TC, neon_averagePoolFloat32_8x8to1x1) +{ + util::TensorWrapper input({1,8,8,2048}); + util::TensorWrapper output({1,1,1,2048}); + + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 2; + int32_t stride_height = 2; + int32_t filter_width = 8; + int32_t filter_height = 8; + + input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.0f; + }); + + output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 0.f; + }); + + int32_t activation = ANEURALNETWORKS_FUSED_NONE; + + bool bret = neon::averagePoolFloat32(input.ptr<float>(), input.shape(), + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + filter_width, filter_height, + activation, + output.ptr<float>(), output.shape()); + EXPECT_EQ(bret, true); + + util::TensorWrapper expected({1,1,1,2048}); + expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) { + return 1.0f; + }); + + EXPECT_EQ(output, expected); +} diff --git a/libs/kernel/acl/src/neon/Reshape.cpp b/libs/kernel/acl/src/neon/Reshape.cpp new file mode 100644 index 000000000..cef84c7f3 --- /dev/null +++ b/libs/kernel/acl/src/neon/Reshape.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> + +// TODO: fix include path in CMakeFiles +#include "../IO_accessor.h" +#include "../shape.h" +#include "../NEUniqueTensor.h" +#include "../Reshape.h" + +namespace nnfw { +namespace kernel { +namespace acl { + +namespace neon { + +static void sync_scheduler() { + arm_compute::CLScheduler::get().sync(); +} + +bool reshapeGeneric(const void* inputData, const nnfw::rt::Shape& inputShape, + void* outputData, const nnfw::rt::Shape& outputShape) { + return common::reshapeGeneric<NEUniqueTensor, arm_compute::NEReshapeLayer> + (inputData, inputShape, outputData, outputShape, sync_scheduler); +} + +} // namespace neon + +} // namespace acl +} // namespace kernel +} // namespace nnfw + diff --git a/libs/kernel/acl/src/neon/Reshape.test.cpp b/libs/kernel/acl/src/neon/Reshape.test.cpp new file mode 100644 index 000000000..9aca45e7e --- /dev/null +++ b/libs/kernel/acl/src/neon/Reshape.test.cpp @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define ACL_CORE_FUNC_NAME neon::reshapeGeneric +#define ACL_TEST(tc, t) TEST(tc, neon_##t) + +#include "../Reshape.test.h" diff --git a/libs/kernel/acl/src/neon/Softmax.cpp b/libs/kernel/acl/src/neon/Softmax.cpp new file mode 100644 index 000000000..79d614418 --- /dev/null +++ b/libs/kernel/acl/src/neon/Softmax.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <NeuralNetworks.h> + +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> +#include "../IO_accessor.h" +#include "../shape.h" +#include "../util.h" +#include "../NEUniqueTensor.h" + +namespace nnfw { +namespace kernel { +namespace acl { +namespace neon { + +bool softmaxFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + const float beta, + float* outputData, const nnfw::rt::Shape& outputShape) +{ + arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); + arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); + + NEUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + NEUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + + auto softmax_f = std::make_shared<arm_compute::NESoftmaxLayer>(); + softmax_f->configure(input.ptr(), output.ptr(), beta); + + input.allocate(); + output.allocate(); + + if (inputShape.dimensions.size() == 4) + { + TensorAccess<InputAccessor>(input.ref(), inputData, inputShape); + + softmax_f->run(); + + TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape); + } + else if (inputShape.dimensions.size() == 2) + { + // Softmax comes with 1xN matrix and this is translated to N vector in arm_compute::TensorShape + TensorAccess<VectorInputAccessor>(input.ref(), inputData, inputShape); + + softmax_f->run(); + + TensorAccess<VectorOutputAccessor>(output.ref(), outputData, outputShape); + } + else + { + assert("undefined dimension of input" && 0); + return false; + } + + return true; +} + +} // namespace neon +} // namespace acl +} // namespace kernel +} // namespace nnfw diff --git a/libs/kernel/acl/src/neon/Softmax.test.cpp b/libs/kernel/acl/src/neon/Softmax.test.cpp new file mode 100644 index 000000000..988f55078 --- /dev/null +++ b/libs/kernel/acl/src/neon/Softmax.test.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> +#include <OperationsUtils.h> +#include <kernel/acl/nnfw_kernel_acl.h> +#include <arm_compute/core/Types.h> +#include <kernel/acl/Softmax.h> + +#include "../util.h" + +using namespace nnfw::kernel::acl; + +TEST(KernelACL_TC, neon_softmaxFloat32_1xn) +{ + float inputData[4]; + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,4}, 1.0, 0 }; + float outputData[4]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,4}, 1.0, 0 }; + const float beta = 1.0f; + bool bret; + + util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = neon::softmaxFloat32(inputData, inputShape, beta, outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { 0.25f, 0.25f, 0.25f, 0.25f }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, neon_softmaxFloat32_4d) +{ + float inputData[4]; + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 }; + float outputData[4]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 }; + const float beta = 1.0f; + bool bret; + + util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = neon::softmaxFloat32(inputData, inputShape, beta, outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = { 0.25f, 0.25f, 0.25f, 0.25f }; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, neon_softmaxFloat32_1xn_seq) +{ + float inputData[4]; + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 }; + float outputData[4]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 }; + const float beta = 1.0f; + bool bret; + + util::initData_Increasing(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = neon::softmaxFloat32(inputData, inputShape, beta, outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = {0.032058603280085, 0.0871443187420326, 0.23688281808991, 0.643914259887972}; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} + +TEST(KernelACL_TC, neon_softmaxFloat32_4d_seq) +{ + float inputData[4]; + const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 }; + float outputData[4]; + const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 }; + const float beta = 1.0f; + bool bret; + + util::initData_Increasing(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + + bret = neon::softmaxFloat32(inputData, inputShape, beta, outputData, outputShape); + EXPECT_EQ(bret, true); + + float expectData[] = {0.032058603280085, 0.0871443187420326, 0.23688281808991, 0.643914259887972}; + bret = util::compareData(outputData, expectData, outputShape); + EXPECT_EQ(bret, true); +} diff --git a/libs/kernel/acl/src/shape.cpp b/libs/kernel/acl/src/shape.cpp new file mode 100644 index 000000000..3c976ae94 --- /dev/null +++ b/libs/kernel/acl/src/shape.cpp @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cassert> + +#include "shape.h" + +namespace nnfw { +namespace rt { + +// TODO remove from this source and use it from runtime +uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx) { + if (dimensionIdx >= shape.dimensions.size()) { + // TODO, log the error + return 0; + } + return shape.dimensions[dimensionIdx]; +} + +} // namespace rt +} // namespace nnfw + +namespace nnfw { +namespace kernel { +namespace acl { +namespace util { + +arm_compute::TensorShape fromVectorNNShape(const nnfw::rt::Shape& shape) +{ + assert(shape.dimensions.size() == 1); + + const uint32_t len = nnfw::rt::getSizeOfDimension(shape, 0); + + return arm_compute::TensorShape(len); +} + +arm_compute::TensorShape fromMatrixNNShape(const nnfw::rt::Shape& shape) +{ + assert(shape.dimensions.size() == 2); + + const uint32_t n = nnfw::rt::getSizeOfDimension(shape, 0); + const uint32_t c = nnfw::rt::getSizeOfDimension(shape, 1); + + return arm_compute::TensorShape(c, n); +} + +arm_compute::TensorShape fromNNShape(const nnfw::rt::Shape& shape) +{ + if( shape.dimensions.size() == 1 ) + return fromVectorNNShape(shape); + else if( shape.dimensions.size() == 2 ) + return fromMatrixNNShape(shape); + + // TODO: need to treat 3D tensors. + + assert(shape.dimensions.size() == 4); + + // NNAPI assumes the following ordering: + // + // dim(0) -> N + // dim(1) -> H + // dim(2) -> W + // dim(3) -> C + // + uint32_t c = nnfw::rt::getSizeOfDimension(shape, 3); + uint32_t h = nnfw::rt::getSizeOfDimension(shape, 1); + uint32_t w = nnfw::rt::getSizeOfDimension(shape, 2); + uint32_t n = nnfw::rt::getSizeOfDimension(shape, 0); + + return arm_compute::TensorShape(w, h, c, n); +} + +} // namespace util +} // namespace acl +} // namespace kernel +} // namespace nnfw diff --git a/libs/kernel/acl/src/shape.h b/libs/kernel/acl/src/shape.h new file mode 100644 index 000000000..902115ebd --- /dev/null +++ b/libs/kernel/acl/src/shape.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_KERNEL_ACL_SHAPE_H__ +#define __NNFW_KERNEL_ACL_SHAPE_H__ + +#include <OperationsUtils.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> +#include <arm_compute/runtime/IFunction.h> +#include <cassert> + +namespace nnfw { +namespace rt { + +// TODO remove from this source and use it from runtime +uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx); + +} // namespace rt +} // namespace nnfw + +namespace nnfw { +namespace kernel { +namespace acl { +namespace util { + +arm_compute::TensorShape fromVectorNNShape(const nnfw::rt::Shape& shape); +arm_compute::TensorShape fromNNShape(const nnfw::rt::Shape& shape); + +template<class TensorT, class ActT> +void insertFusedActivationLayer(TensorT& out, int activation, + std::vector<std::shared_ptr<arm_compute::IFunction>>& fns) { + auto relu_f = std::make_shared<ActT>(); + + switch(activation) { + case ANEURALNETWORKS_FUSED_NONE: + // DO NOTHING + return; + + case ANEURALNETWORKS_FUSED_RELU: + { + const arm_compute::ActivationLayerInfo relu_info(arm_compute::ActivationLayerInfo::ActivationFunction::RELU); + + // Do in-place update + relu_f->configure(out.ptr(), nullptr, relu_info); + } + break; + + case ANEURALNETWORKS_FUSED_RELU1: + { + const arm_compute::ActivationLayerInfo relu_info(arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 1.f); + + // Do in-place update + relu_f->configure(out.ptr(), nullptr, relu_info); + } + break; + + case ANEURALNETWORKS_FUSED_RELU6: + { + const arm_compute::ActivationLayerInfo relu_info(arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f); + + // Do in-place update + relu_f->configure(out.ptr(), nullptr, relu_info); + } + break; + + default: + assert("Undefined activation type." && 0); + break; + } + + fns.emplace_back(relu_f); +} + +} // namespace util +} // namespace acl +} // namespace kernel +} // namespace nnfw + +#endif // __NNFW_KERNEL_ACL_SHAPE_H__ diff --git a/libs/kernel/acl/src/support.cpp b/libs/kernel/acl/src/support.cpp new file mode 100644 index 000000000..d04aef59e --- /dev/null +++ b/libs/kernel/acl/src/support.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "support.h" + +namespace nnfw +{ +namespace support +{ +namespace nnapi +{ +namespace feature +{ + +// TODO Extract this function as utility function +// NOTE It is not a good design to access nnfw::rt::Shape nnfw_support_nnapi lib +nnfw::util::feature::Shape asFeatureShape(const nnfw::rt::Shape& shape) +{ + // NNAPI assumes the following ordering: + // + // dim(0) -> N + // dim(1) -> H + // dim(2) -> W + // dim(3) -> C + // + int32_t c = nnfw::rt::getSizeOfDimension(shape, 3); + int32_t h = nnfw::rt::getSizeOfDimension(shape, 1); + int32_t w = nnfw::rt::getSizeOfDimension(shape, 2); + + assert(nnfw::rt::getSizeOfDimension(shape, 0) == 1); + + return nnfw::util::feature::Shape{c, h, w}; +} + +} // namespace feature +} // namespace nnapi +} // namespace support +} // namespace nnfw diff --git a/libs/kernel/acl/src/support.h b/libs/kernel/acl/src/support.h new file mode 100644 index 000000000..751d2c6cb --- /dev/null +++ b/libs/kernel/acl/src/support.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_KERNEL_SUPPORT_H_TEMPORARY__ +#define __NNFW_KERNEL_SUPPORT_H_TEMPORARY__ + +// NOTE these are not decided yet but need to be moved out from Conv2D +// to separate NEON implementation to it's folder +// TODO move to some folder where it should be + +#include <cassert> + +#include "util/feature/Shape.h" + +#include <OperationsUtils.h> + +namespace nnfw +{ +namespace support +{ +namespace nnapi +{ +namespace feature +{ + +// TODO Extract this function as utility function +// NOTE It is not a good design to access nnfw::rt::Shape nnfw_support_nnapi lib +nnfw::util::feature::Shape asFeatureShape(const nnfw::rt::Shape& shape); + +} // namespace feature +} // namespace nnapi +} // namespace support +} // namespace nnfw + +#include <arm_compute/core/ITensor.h> + +#include "util/feature/Reader.h" + +namespace nnfw +{ +namespace support +{ +namespace acl +{ +namespace feature +{ + +template<typename T> class Reader; + +template<> class Reader<float> final : public nnfw::util::feature::Reader<float> +{ +public: + Reader(arm_compute::ITensor *tensor) : _tensor{tensor} + { + assert(tensor->info()->data_type() == arm_compute::DataType::F32); + } + +public: + float at(uint32_t ch, uint32_t row, uint32_t col) const override + { + return *ptr_to_element(ch, row, col); + } + +private: + float *ptr_to_element(uint32_t ch, uint32_t row, uint32_t col) const + { + // ARM Compute uses CHW ordering + return reinterpret_cast<float *>(_tensor->ptr_to_element(arm_compute::Coordinates{col, row, ch})); + } + +private: + arm_compute::ITensor *_tensor; +}; + +} // namespace feature +} // namespace acl +} // namespace support +} // namespace nnfw + +#endif // __NNFW_KERNEL_SUPPORT_H_TEMPORARY__ diff --git a/libs/kernel/acl/src/util.cpp b/libs/kernel/acl/src/util.cpp new file mode 100644 index 000000000..7e5df534e --- /dev/null +++ b/libs/kernel/acl/src/util.cpp @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <util/fp32.h> + +#include "util.h" + +namespace nnfw { +namespace kernel { +namespace acl { +namespace util { + +void initData(float* data, int num, float value) +{ + for (int i = 0; i < num; i++) { + *(data + i) = value; + } +} + +void initData_Increasing(float* data, int num, float value) +{ + for (int i = 0; i < num; i++) { + *(data + i) = value; + value++; + } +} + +// compareData +// return true if result == expected with the shape info, +// otherwise false +bool compareData(const float* result, const float* expected, const nnfw::rt::Shape& shape) +{ + if (shape.dimensions.size() == 4) + { + // TODO fix indentation + uint32_t height = nnfw::rt::getSizeOfDimension(shape, 1); + uint32_t width = nnfw::rt::getSizeOfDimension(shape, 2); + uint32_t numitems = height * width; + for (int item = 0; item < numitems; item++) { + if (!::nnfw::util::fp32::epsilon_equal(*(result + item), *(expected + item), 1)) { + LOG(ERROR) << "compareData failed: result " << *(result + item) + << ", expected " << *(expected + item) + << ", diff " << ::nnfw::util::fp32::relative_diff(*(result + item), *(expected + item)) + << std::endl; + return false; + } + } + } + else if (shape.dimensions.size() == 2) + { + uint32_t height = nnfw::rt::getSizeOfDimension(shape, 0); + uint32_t width = nnfw::rt::getSizeOfDimension(shape, 1); + uint32_t numitems = height * width; + for (int item = 0; item < numitems; item++) { + if (!::nnfw::util::fp32::epsilon_equal(*(result + item), *(expected + item), 1)) { + LOG(ERROR) << "compareData failed: result " << *(result + item) + << ", expected " << *(expected + item) + << ", diff " << ::nnfw::util::fp32::relative_diff(*(result + item), *(expected + item)) + << std::endl; + return false; + } + } + } + else + { + // TODO: add a handler for rank 1 and 3 + LOG(ERROR) << "Unhandled shape: " << shape.dimensions.size() << std::endl; + } + return true; +} + +void NCHW2NHWC(const float* nchw, float* nhwc, const nnfw::rt::Shape& shape) +{ + uint32_t N = nnfw::rt::getSizeOfDimension(shape, 0); + uint32_t H = nnfw::rt::getSizeOfDimension(shape, 1); + uint32_t W = nnfw::rt::getSizeOfDimension(shape, 2); + uint32_t C = nnfw::rt::getSizeOfDimension(shape, 3); + + for (uint32_t n = 0; n < N; n++) { + for (uint32_t c = 0; c < C; c++) { + for (uint32_t h = 0; h < H; h++) { + for (uint32_t w = 0; w < W; w++) { + uint32_t soffset = w + (h * W) + (c * W * H) + (n * W * H * C); + uint32_t doffset = c + (w * C) + (h * C * W) + (n * C * W * H); + *(nhwc + doffset) = *(nchw + soffset); + } + } + } + } +} + +} // namespace util +} // namespace acl +} // namespace kernel +} // namespace nnfw diff --git a/libs/kernel/acl/src/util.h b/libs/kernel/acl/src/util.h new file mode 100644 index 000000000..48ed02783 --- /dev/null +++ b/libs/kernel/acl/src/util.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_KERNEL_ACL_UTIL_H__ +#define __NNFW_KERNEL_ACL_UTIL_H__ +#include <OperationsUtils.h> + +#include <cmath> +#include <cassert> +#include <functional> + +namespace nnfw { +namespace kernel { +namespace acl { +namespace util { + +// TODO: make a separate module. +class TensorWrapper { +public: + TensorWrapper(std::vector<uint32_t> dims, + OperandType type = OperandType::FLOAT32, + float scale = 1.0, + int32_t offset = 0) + :_shape{type, dims, scale, offset} + { + + // currently, we support only FLOAT32 for now. + assert( type == OperandType::FLOAT32); + + uint32_t size_bytes = sizeof(float); + + _num_elems = 1; + for( auto& d: dims ) { + _num_elems *= d; + } + + _data = new uint8_t[_num_elems * size_bytes]; + } + + ~TensorWrapper() { + delete [] _data; + } + + const nnfw::rt::Shape shape() const { + return _shape; + } + + uint32_t num_elems() const { return _num_elems; } + + template<class T> + T at(const uint32_t& idx) const { + return reinterpret_cast<T*>(_data)[idx]; + } + + template<class T> + T& at(const uint32_t& idx) { + return reinterpret_cast<T*>(_data)[idx]; + } + + template<class T> + T* ptr() { return reinterpret_cast<T*>(_data); } + + void initValue(float f) { + for( uint32_t i = 0; i < _num_elems; ++i ) { + at<float>(i) = f; + } + } + + typedef std::function<float(uint32_t n, uint32_t c, uint32_t h, uint32_t w)> funcInit4; + void initValue(funcInit4 f) { + assert(_shape.dimensions.size() == 4); + + int N = _shape.dimensions[0]; + int H = _shape.dimensions[1]; + int W = _shape.dimensions[2]; + int C = _shape.dimensions[3]; + + for(int n = 0; n < N; ++n) { + for(int h = 0; h < H; ++h) { + for(int w = 0; w < W; ++w) { + for(int c = 0; c < C; ++c) { + uint32_t offset = n*H*W*C + h*W*C + w*C + c; + at<float>(offset) = f(n,c,h,w); + } + } + } + } + } + + typedef std::function<float(uint32_t c, uint32_t h, uint32_t w)> funcInit3; + void initValue(funcInit3 f) { + assert(_shape.dimensions.size() == 3); + + int C = _shape.dimensions[0]; + int H = _shape.dimensions[1]; + int W = _shape.dimensions[2]; + + for(int h = 0; h < H; ++h) { + for(int w = 0; w < W; ++w) { + for(int c = 0; c < C; ++c) { + uint32_t offset = h*W*C + w*C + c; + at<float>(offset) = f(c,h,w); + } + } + } + } + + typedef std::function<float(uint32_t h, uint32_t w)> funcInit2; + void initValue(funcInit2 f) { + assert(_shape.dimensions.size() == 2); + + int H = _shape.dimensions[0]; + int W = _shape.dimensions[1]; + + for(int h = 0; h < H; ++h) { + for(int w = 0; w < W; ++w) { + uint32_t offset = h*W + w; + at<float>(offset) = f(h,w); + } + } + } + + typedef std::function<float(uint32_t w)> funcInit1; + void initValue(funcInit1 f) { + assert(_shape.dimensions.size() == 1); + + int W = _shape.dimensions[0]; + + for(int w = 0; w < W; ++w) { + uint32_t offset = w; + at<float>(offset) = f(w); + } + } + + void initValue(std::vector<float> v) { + assert(v.size() == _num_elems); + for( uint32_t i = 0; i < _num_elems; ++i ) { + at<float>(i) = v[i]; + } + } + + bool operator==(const TensorWrapper &t) const { + // compare the shape + assert(num_elems() == t.num_elems()); + assert(_shape.type == t.shape().type); + assert(_shape.scale == t.shape().scale); + assert(_shape.offset == t.shape().offset); + assert(_shape.dimensions == t.shape().dimensions); + + // currently, we support only FLOAT32. + assert(_shape.type == OperandType::FLOAT32); + + for( uint32_t i = 0; i < _num_elems; ++i ) { + if( std::fabs(static_cast<float>(at<float>(i) - t.at<float>(i))) > 0.001f ) { + std::cout << "Comparing [" << i << "] " << at<float>(i) << "," << t.at<float>(i) << std::endl; + return false; + } + } + + return true; + } + +private: + nnfw::rt::Shape _shape; + uint32_t _num_elems; + uint8_t* _data; +}; + +void initData(float* data, int num, float value); +bool compareData(const float* result, const float* expected, const nnfw::rt::Shape& shape); +void initData_Increasing(float* data, int num, float value); + +void NCHW2NHWC(const float* nchw, float* nhwc, const nnfw::rt::Shape& shape); + +} // namespace util +} // namespace acl +} // namespace kernel +} // namespace nnfw + +#endif // __NNFW_KERNEL_ACL_UTIL_H__ |