diff options
Diffstat (limited to 'contrib/labs')
20 files changed, 2615 insertions, 0 deletions
diff --git a/contrib/labs/CMakeLists.txt b/contrib/labs/CMakeLists.txt new file mode 100644 index 000000000..57e28c11a --- /dev/null +++ b/contrib/labs/CMakeLists.txt @@ -0,0 +1,5 @@ +if(NOT BUILD_LABS) + return() +endif(NOT BUILD_LABS) + +add_subdirectories() diff --git a/contrib/labs/jniacl/CMakeLists.txt b/contrib/labs/jniacl/CMakeLists.txt new file mode 100644 index 000000000..f66127b84 --- /dev/null +++ b/contrib/labs/jniacl/CMakeLists.txt @@ -0,0 +1,18 @@ +# +# Simple Android JNI execution test of ACL +# + +if(NOT "${TARGET_OS}" STREQUAL "android") + return() +endif(NOT "${TARGET_OS}" STREQUAL "android") + +nnfw_find_package(ARMCompute REQUIRED) + +link_directories(${CMAKE_INSTALL_PREFIX}/lib) + +set(JNIACL_SRCS src/jniacl_main.cc + src/io_accessor.cc) + +add_library(jniacl_jni SHARED ${JNIACL_SRCS}) +target_include_directories(jniacl_jni PUBLIC ${TFLITE_JNI_INCLUDES} src) +target_link_libraries(jniacl_jni arm_compute_graph log) diff --git a/contrib/labs/jniacl/src/io_accessor.cc b/contrib/labs/jniacl/src/io_accessor.cc new file mode 100644 index 000000000..103660716 --- /dev/null +++ b/contrib/labs/jniacl/src/io_accessor.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "io_accessor.h" +#include <ostream> +#include <android/log.h> + +bool InputAccessor::access_tensor(arm_compute::ITensor &tensor) +{ + // Subtract the mean value from each channel + arm_compute::Window window; + window.use_tensor_dimensions(tensor.info()->tensor_shape()); + + execute_window_loop(window, [&](const arm_compute::Coordinates& id) + { + *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = _test_input; + _test_input += _inc ? 1.0 : 0.0; + + __android_log_print(ANDROID_LOG_DEBUG, "LOG_TAG", "Input %d, %d = %lf\r\n", + id.y(), id.x(), *reinterpret_cast<float *>(tensor.ptr_to_element(id))); + }); + return true; +} + +bool OutputAccessor::access_tensor(arm_compute::ITensor &tensor) +{ + // Subtract the mean value from each channel + arm_compute::Window window; + window.use_tensor_dimensions(tensor.info()->tensor_shape()); + + execute_window_loop(window, [&](const arm_compute::Coordinates& id) + { + __android_log_print(ANDROID_LOG_DEBUG, "Output", "Input %d, %d = %lf\r\n", + id.y(), id.x(), *reinterpret_cast<float *>(tensor.ptr_to_element(id))); + }); + return false; // end the network +} + +bool WeightAccessor::access_tensor(arm_compute::ITensor &tensor) +{ + // Subtract the mean value from each channel + arm_compute::Window window; + window.use_tensor_dimensions(tensor.info()->tensor_shape()); + + execute_window_loop(window, [&](const arm_compute::Coordinates& id) + { + *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = _test_weight; + _test_weight += _inc ? 1.0 : 0.0; + }); + return true; +} + +bool BiasAccessor::access_tensor(arm_compute::ITensor &tensor) +{ + // Subtract the mean value from each channel + arm_compute::Window window; + window.use_tensor_dimensions(tensor.info()->tensor_shape()); + + execute_window_loop(window, [&](const arm_compute::Coordinates& id) + { + *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = 0.0; + }); + return true; +} diff --git a/contrib/labs/jniacl/src/io_accessor.h b/contrib/labs/jniacl/src/io_accessor.h new file mode 100644 index 000000000..4033020e0 --- /dev/null +++ b/contrib/labs/jniacl/src/io_accessor.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __IO_ACCESSOR_H__ +#define __IO_ACCESSOR_H__ + +#include <arm_compute/graph/ITensorAccessor.h> + +class InputAccessor : public arm_compute::graph::ITensorAccessor +{ +public: + InputAccessor(bool inc) : _inc(inc) { _test_input = 1.0; } + InputAccessor(InputAccessor&&) = default; + + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor& tensor) override; + +private: + bool _inc; + float _test_input; +}; + +class OutputAccessor : public arm_compute::graph::ITensorAccessor +{ +public: + OutputAccessor() = default; + OutputAccessor(OutputAccessor&&) = default; + + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor& tensor) override; +}; + +class WeightAccessor : public arm_compute::graph::ITensorAccessor +{ +public: + WeightAccessor(bool inc) : _inc(inc) { _test_weight = 1.0; } + WeightAccessor(WeightAccessor&&) = default; + + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor& tensor) override; + +private: + bool _inc; + float _test_weight; +}; + +class BiasAccessor : public arm_compute::graph::ITensorAccessor +{ +public: + BiasAccessor() = default; + BiasAccessor(BiasAccessor&&) = default; + + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor& tensor) override; +}; + +#endif // __IO_ACCESSOR_H__ diff --git a/contrib/labs/jniacl/src/jniacl_main.cc b/contrib/labs/jniacl/src/jniacl_main.cc new file mode 100644 index 000000000..515f28732 --- /dev/null +++ b/contrib/labs/jniacl/src/jniacl_main.cc @@ -0,0 +1,39 @@ +#include <jni.h> +#include <string> + +#include <arm_compute/graph/Graph.h> +#include <arm_compute/graph/Nodes.h> + +#include "io_accessor.h" + +extern "C" JNIEXPORT jstring JNICALL +Java_com_samsung_testaclexec_ActivityMain_RunACLJNI(JNIEnv *env, jobject) +{ + using arm_compute::DataType; + using arm_compute::graph::Tensor; + using arm_compute::graph::TargetHint; + using arm_compute::graph::Graph; + using arm_compute::TensorInfo; + using arm_compute::TensorShape; + + arm_compute::graph::Graph graph; + TargetHint target_hint = TargetHint::OPENCL; + bool autoinc = true; + + graph << target_hint + << Tensor(TensorInfo(TensorShape(3U, 3U, 1U, 1U), 1, DataType::F32), + std::unique_ptr<InputAccessor>(new InputAccessor(autoinc))) + << arm_compute::graph::ConvolutionLayer( + 3U, 3U, 1U, + std::unique_ptr<WeightAccessor>(new WeightAccessor(autoinc)), + std::unique_ptr<BiasAccessor>(new BiasAccessor()), + arm_compute::PadStrideInfo(1, 1, 0, 0)) + << Tensor(std::unique_ptr<OutputAccessor>(new OutputAccessor())); + ; + + graph.run(); + + std::string hello = "SoftMax Run OK"; + + return env->NewStringUTF(hello.c_str()); +} diff --git a/contrib/labs/kerneltesting/CMakeLists.txt b/contrib/labs/kerneltesting/CMakeLists.txt new file mode 100644 index 000000000..5792d0fe8 --- /dev/null +++ b/contrib/labs/kerneltesting/CMakeLists.txt @@ -0,0 +1,19 @@ +if(NOT ${TARGET_ARCH_BASE} STREQUAL "arm") + return() +endif(NOT ${TARGET_ARCH_BASE} STREQUAL "arm") + +nnfw_find_package(ARMCompute REQUIRED) + +function(add_kerneltesting TESTNAME SRC_FILES) + link_directories(${CMAKE_INSTALL_PREFIX}/lib) + add_executable(${TESTNAME} ${SRC_FILES}) + target_include_directories(${TESTNAME} PUBLIC + ${NNFW_INCLUDE_DIR}) + target_link_libraries(${TESTNAME} nnfw_lib_misc arm_compute_graph) + install(TARGETS ${TESTNAME} DESTINATION bin) +endfunction() + +# TODO: Enable conv2d on Tizen +if (NOT ${TARGET_OS} STREQUAL "tizen") + add_subdirectory(conv2d) +endif() diff --git a/contrib/labs/kerneltesting/conv2d/CMakeLists.txt b/contrib/labs/kerneltesting/conv2d/CMakeLists.txt new file mode 100644 index 000000000..25e01f584 --- /dev/null +++ b/contrib/labs/kerneltesting/conv2d/CMakeLists.txt @@ -0,0 +1,15 @@ +set(KERNELTESTING_CONV2D kerneltesting_conv2d) + +set(KERNELTESTING_CONV2D_SRCS "nnfw_conv2d_test.cpp" + "io_accessor.cpp") + +set(GEMLOWP_INCUDE ${TFLITE_DEPEND_DIR}/gemmlowp/public) +set(EIGN_INCLUDE ${TFLITE_DEPEND_DIR}/eigen + ${TFLITE_DEPEND_DIR}/eigen/Eigen) + +add_kerneltesting(${KERNELTESTING_CONV2D} "${KERNELTESTING_CONV2D_SRCS}") + +target_include_directories(${KERNELTESTING_CONV2D} PUBLIC + ${GEMLOWP_INCUDE} + ${EIGN_INCLUDE} + ) diff --git a/contrib/labs/kerneltesting/conv2d/OperationUtils.h b/contrib/labs/kerneltesting/conv2d/OperationUtils.h new file mode 100644 index 000000000..0beac80a4 --- /dev/null +++ b/contrib/labs/kerneltesting/conv2d/OperationUtils.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_UTILS_H +#define ANDROID_ML_NN_COMMON_OPERATIONS_UTILS_H + +#include <cstdint> +#include <vector> +#include <ostream> + +#define LOG(ERROR) std::cerr + +// Macro to check if the input parameters for operation are valid or not. +#define NN_CHECK(v) \ + do { \ + if (!(v)) { \ + LOG(ERROR) << "NN_CHECK failed: " << #v << "'\n"; \ + return false; \ + } \ + } while(0); + +#define NN_CHECK_EQ(actual, expected) \ + NN_CHECK((actual) == (expected)) + +#define NN_OPS_CHECK NN_CHECK + +enum PaddingScheme { + kPaddingUnknown = 0, + kPaddingSame = 1, + kPaddingValid = 2, +}; + +enum class FusedActivationFunc : int32_t { + NONE = 0, + RELU = 1, + RELU1 = 2, + RELU6 = 3, +}; + + +#define ANDROID_NN_MACRO_DISPATCH(macro) \ + switch (activation) { \ + case (int32_t) FusedActivationFunc::NONE: \ + macro(kNone); \ + break; \ + case (int32_t) FusedActivationFunc::RELU: \ + macro(kRelu); \ + break; \ + case (int32_t) FusedActivationFunc::RELU1: \ + macro(kRelu1); \ + break; \ + case (int32_t) FusedActivationFunc::RELU6: \ + macro(kRelu6); \ + break; \ + default: \ + LOG(ERROR) << "Unsupported fused activation function type"; \ + return false; \ + } + + +#endif // ANDROID_ML_NN_COMMON_OPERATIONS_UTILS_H diff --git a/contrib/labs/kerneltesting/conv2d/common.h b/contrib/labs/kerneltesting/conv2d/common.h new file mode 100644 index 000000000..8e675e664 --- /dev/null +++ b/contrib/labs/kerneltesting/conv2d/common.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMMON_H_ +#define ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMMON_H_ + +#ifndef USE_NEON +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#define USE_NEON +#include <arm_neon.h> +#endif +#endif + +#include <gemmlowp.h> +#include "types.h" + +template <FusedActivationFunctionType Ac> +struct ActivationFunctionImpl {}; + +template <> +struct ActivationFunctionImpl<FusedActivationFunctionType::kNone> { + static float Eval(float x) { return x; } +}; + +template <> +struct ActivationFunctionImpl<FusedActivationFunctionType::kRelu> { + static float Eval(float x) { return x < 0.f ? 0.f : x; } +}; + +template <> +struct ActivationFunctionImpl<FusedActivationFunctionType::kRelu1> { + static float Eval(float x) { return x > 1.f ? 1.f : x < -1.f ? -1.f : x; } +}; + +template <> +struct ActivationFunctionImpl<FusedActivationFunctionType::kRelu6> { + static float Eval(float x) { return x > 6.f ? 6.f : x < 0.f ? 0.f : x; } +}; + +template <FusedActivationFunctionType Ac> +float ActivationFunction(float x) { + return ActivationFunctionImpl<Ac>::Eval(x); +} + +inline int32 MultiplyByQuantizedMultiplierSmallerThanOne( + int32 x, int32 quantized_multiplier, int right_shift) { + using gemmlowp::RoundingDivideByPOT; + using gemmlowp::SaturatingRoundingDoublingHighMul; + return RoundingDivideByPOT( + SaturatingRoundingDoublingHighMul(x, quantized_multiplier), right_shift); +} + +inline int32 MultiplyByQuantizedMultiplierGreaterThanOne( + int32 x, int32 quantized_multiplier, int left_shift) { + using gemmlowp::SaturatingRoundingDoublingHighMul; + return SaturatingRoundingDoublingHighMul(x * (1 << left_shift), + quantized_multiplier); +} + +#endif // ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMMON_H_ diff --git a/contrib/labs/kerneltesting/conv2d/compatibility.h b/contrib/labs/kerneltesting/conv2d/compatibility.h new file mode 100644 index 000000000..db8ba04bc --- /dev/null +++ b/contrib/labs/kerneltesting/conv2d/compatibility.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMPATIBILITY_H_ +#define ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMPATIBILITY_H_ + +#ifndef ANDROID_ML_NN_COMPATIBILITY +#define ANDROID_ML_NN_COMPATIBILITY + +#include <cassert> +#include <cstdint> + +#ifndef DCHECK +#define DCHECK(condition) (condition) ? (void)0 : assert(false) +#endif + +#ifndef DCHECK_EQ +#define DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : assert(false) +#endif + +#ifndef DCHECK_GE +#define DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : assert(false) +#endif + +#ifndef DCHECK_GT +#define DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : assert(false) +#endif + +#ifndef DCHECK_LE +#define DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : assert(false) +#endif + +#ifndef DCHECK_LT +#define DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : assert(false) +#endif + +#ifndef CHECK_EQ +#define CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : assert(false) +#endif + +using uint8 = std::uint8_t; +using int16 = std::int16_t; +using uint16 = std::uint16_t; +using int32 = std::int32_t; +using uint32 = std::uint32_t; + +#endif + +#endif // ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMPATIBILITY_H_ diff --git a/contrib/labs/kerneltesting/conv2d/io_accessor.cpp b/contrib/labs/kerneltesting/conv2d/io_accessor.cpp new file mode 100644 index 000000000..6d3cd9d04 --- /dev/null +++ b/contrib/labs/kerneltesting/conv2d/io_accessor.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "io_accessor.h" + +InputAccessor::InputAccessor(const float* inputData, const Shape& inputShape) + : _inputData(inputData) + , _inputShape(inputShape) +{ +} + +WeightAccessor::WeightAccessor(const float* filterData, const Shape& filterShape) + : _filterData(filterData) + , _filterShape(filterShape) +{ +} + +BiasAccessor::BiasAccessor(const float* biasData, const Shape& biasShape) + : _biasData(biasData) + , _biasShape(biasShape) +{ +} + +OutputAccessor::OutputAccessor(float* outputData, const Shape& outputShape) + : _outputData(outputData) + , _outputShape(outputShape) +{ +} + +bool InputAccessor::access_tensor(arm_compute::ITensor &tensor) +{ + arm_compute::Window window; + window.use_tensor_dimensions(tensor.info()->tensor_shape()); + + execute_window_loop(window, [&](const arm_compute::Coordinates& id) + { + uint32_t width = getSizeOfDimension(_inputShape, 2); + uint32_t offset = id.y() * width + id.x(); + *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = + *(_inputData + offset); + }); + return true; +} + +bool WeightAccessor::access_tensor(arm_compute::ITensor &tensor) +{ + arm_compute::Window window; + window.use_tensor_dimensions(tensor.info()->tensor_shape()); + + execute_window_loop(window, [&](const arm_compute::Coordinates& id) + { + uint32_t width = getSizeOfDimension(_filterShape, 2); + uint32_t offset = id.y() * width + id.x(); + *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = + *(_filterData + offset); + }); + return true; +} + +bool BiasAccessor::access_tensor(arm_compute::ITensor &tensor) +{ + arm_compute::Window window; + window.use_tensor_dimensions(tensor.info()->tensor_shape()); + + execute_window_loop(window, [&](const arm_compute::Coordinates& id) + { + uint32_t width = getSizeOfDimension(_biasShape, 2); + uint32_t offset = id.y() * width + id.x(); + *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = + *(_biasData + offset); + }); + return true; +} + +bool OutputAccessor::access_tensor(arm_compute::ITensor &tensor) +{ + arm_compute::Window window; + window.use_tensor_dimensions(tensor.info()->tensor_shape()); + + execute_window_loop(window, [&](const arm_compute::Coordinates& id) + { + uint32_t width = getSizeOfDimension(_outputShape, 2); + uint32_t offset = id.y() * width + id.x(); + *(_outputData + offset) = + *reinterpret_cast<float *>(tensor.ptr_to_element(id)); + }); + return false; // end the network +} diff --git a/contrib/labs/kerneltesting/conv2d/io_accessor.h b/contrib/labs/kerneltesting/conv2d/io_accessor.h new file mode 100644 index 000000000..0201f7242 --- /dev/null +++ b/contrib/labs/kerneltesting/conv2d/io_accessor.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __CONV2D_IO_ACCESSOR_H__ +#define __CONV2D_IO_ACCESSOR_H__ + +#include <arm_compute/graph/ITensorAccessor.h> +#include <arm_compute/runtime/CL/CLFunctions.h> + +#include "types.h" + +class InputAccessor : public arm_compute::graph::ITensorAccessor +{ +public: + InputAccessor(const float* inputData, const Shape& inputShape); + InputAccessor(InputAccessor&&) = default; + + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor& tensor) override; + +private: + const float* _inputData; + const Shape& _inputShape; +}; + +class WeightAccessor : public arm_compute::graph::ITensorAccessor +{ +public: + WeightAccessor(const float* filterData, const Shape& filterShape); + WeightAccessor(WeightAccessor&&) = default; + + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor& tensor) override; + +private: + const float* _filterData; + const Shape& _filterShape; +}; + +class BiasAccessor : public arm_compute::graph::ITensorAccessor +{ +public: + BiasAccessor(const float* biasData, const Shape& biasShape); + BiasAccessor(BiasAccessor&&) = default; + + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor& tensor) override; + +private: + const float* _biasData; + const Shape& _biasShape; +}; + +class OutputAccessor : public arm_compute::graph::ITensorAccessor +{ +public: + OutputAccessor(float* outputData, const Shape& outputShape); + OutputAccessor(OutputAccessor&&) = default; + + // Inherited methods overriden: + bool access_tensor(arm_compute::ITensor& tensor) override; + +private: + float* _outputData; + const Shape& _outputShape; +}; + +#endif // __CONV2D_IO_ACCESSOR_H__ diff --git a/contrib/labs/kerneltesting/conv2d/nnfw_conv2d_test.cpp b/contrib/labs/kerneltesting/conv2d/nnfw_conv2d_test.cpp new file mode 100644 index 000000000..190be016e --- /dev/null +++ b/contrib/labs/kerneltesting/conv2d/nnfw_conv2d_test.cpp @@ -0,0 +1,607 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <iostream> +#include <vector> +#include <cassert> + +#include <Eigen/Core> +#include <gemmlowp.h> + +#include "types.h" +#include "common.h" +#include "optimized_ops.h" +#include "OperationUtils.h" + +#include <arm_compute/graph.h> + +#include <arm_compute/runtime/CL/CLFunctions.h> +#include <arm_compute/runtime/CL/functions/CLConvolution.h> + +#include "io_accessor.h" +#include "misc/environment.h" + +static constexpr int kStaticBufferSize = 1605632; +static char static_scratch_buffer[kStaticBufferSize]; + +#define ANDROID_NN_CONV_PARAMETERS(Type) \ + uint32_t height = getSizeOfDimension(inputShape, 1); \ + uint32_t width = getSizeOfDimension(inputShape, 2); \ + uint32_t filterHeight = getSizeOfDimension(filterShape, 1); \ + uint32_t filterWidth = getSizeOfDimension(filterShape, 2); \ + uint32_t outHeight = getSizeOfDimension(outputShape, 1); \ + uint32_t outWidth = getSizeOfDimension(outputShape, 2); \ + uint32_t inDepth = getSizeOfDimension(inputShape, 3); \ + \ + uint32_t paddingHeight = (uint32_t)padding_top; \ + uint32_t paddingWidth = (uint32_t)padding_left; \ + \ + Dims<4> im2colDim; \ + im2colDim.sizes[3] = (int)getSizeOfDimension(outputShape, 0); \ + im2colDim.sizes[2] = (int)getSizeOfDimension(outputShape, 1); \ + im2colDim.sizes[1] = (int)getSizeOfDimension(outputShape, 2); \ + im2colDim.sizes[0] = (int)inDepth * filterHeight * filterWidth; \ + \ + im2colDim.strides[0] = 1; \ + for (int i=1; i<4; i++) { \ + im2colDim.strides[i] = im2colDim.strides[i-1] * im2colDim.sizes[i-1]; \ + } \ + \ + Type* im2colData = nullptr; \ + int im2colByteSize = sizeof(Type); \ + for (int i=0; i<4; i++) { \ + im2colByteSize *= im2colDim.sizes[i]; \ + } \ + if (im2colByteSize <= kStaticBufferSize) { \ + im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \ + } else { \ + im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \ + } + + +bool convFloat32(const float* inputData, const Shape& inputShape, + const float* filterData, const Shape& filterShape, + const float* biasData, const Shape& biasShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t activation, + float* outputData, const Shape& outputShape) { + + ANDROID_NN_CONV_PARAMETERS(float) + + #define ANDROID_NN_CONV(activation) \ + Conv<FusedActivationFunctionType::activation>( \ + inputData, convertShapeToDims(inputShape), \ + filterData, convertShapeToDims(filterShape), \ + biasData, convertShapeToDims(biasShape), \ + stride_width, stride_height, paddingWidth, paddingHeight, \ + outputData, convertShapeToDims(outputShape), \ + im2colData, im2colDim) + + ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_CONV) + + #undef ANDROID_NN_CONV + + if (im2colByteSize > kStaticBufferSize) { + delete[] im2colData; + } + return true; +} + +//----------------------------------------------------------------------------- + +using arm_compute::DataType; +using arm_compute::graph::Target; +using arm_compute::graph::TensorDescriptor; +using arm_compute::TensorShape; +using arm_compute::graph::frontend::InputLayer; +using arm_compute::graph::frontend::OutputLayer; + +namespace acl_graph { + +bool convFloat32(const float* inputData, const Shape& inputShape, + const float* filterData, const Shape& filterShape, + const float* biasData, const Shape& biasShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t activation, + float* outputData, const Shape& outputShape) +{ + // Try with simple build-run with ACL Layer + arm_compute::graph::frontend::Stream graph{0, "ACL_CONV2D_TEST"}; + + Target target_hint = nnfw::misc::get_env_int("NNFW_ACL_USENEON") + ? Target::NEON : Target::CL; + + // Not sure about which index is which value + uint32_t tsi_c = getSizeOfDimension(inputShape, 0); + uint32_t tsi_h = getSizeOfDimension(inputShape, 1); + uint32_t tsi_w = getSizeOfDimension(inputShape, 2); + uint32_t tsi_n = getSizeOfDimension(inputShape, 3); + + uint32_t tsk_h = getSizeOfDimension(filterShape, 1); + uint32_t tsk_w = getSizeOfDimension(filterShape, 2); + uint32_t tsk_n = getSizeOfDimension(filterShape, 3); + + graph << target_hint + << InputLayer(TensorDescriptor(TensorShape(tsi_w, tsi_h, tsi_c, tsi_n), DataType::F32), + std::unique_ptr<InputAccessor>(new InputAccessor(inputData, inputShape))) + << arm_compute::graph::frontend::ConvolutionLayer( + tsk_w, tsk_h, tsk_n, + std::unique_ptr<WeightAccessor>(new WeightAccessor(filterData, filterShape)), + std::unique_ptr<BiasAccessor>(new BiasAccessor(biasData, biasShape)), + arm_compute::PadStrideInfo(stride_width, stride_height, padding_top, padding_bottom)) + ; + if (activation != static_cast<int32_t>(FusedActivationFunc::NONE)) { + arm_compute::ActivationLayerInfo::ActivationFunction actFunc = + arm_compute::ActivationLayerInfo::ActivationFunction::RELU; + + graph << arm_compute::graph::frontend::ActivationLayer(arm_compute::ActivationLayerInfo(actFunc)); + // Activation does not provide output Tensor and makes next layer fail to add to graph + // when it's the last(output) layer. To solve this, need to add a dummy layer. + uint32_t tso_c = getSizeOfDimension(outputShape, 0); + uint32_t tso_h = getSizeOfDimension(outputShape, 1); + uint32_t tso_w = getSizeOfDimension(outputShape, 2); + uint32_t tso_n = getSizeOfDimension(outputShape, 3); + graph << arm_compute::graph::frontend::ReshapeLayer(TensorShape(tso_w, tso_h, tso_c, tso_n)); + } + graph << OutputLayer(std::unique_ptr<OutputAccessor>(new OutputAccessor(outputData, outputShape))) + ; + + graph.run(); + + return true; +} + +} // namespace acl_graph + +//----------------------------------------------------------------------------- + +using arm_compute::TensorInfo; + +namespace acl_runtime { + +TensorShape calculate_convolution_layer_output_shape( + const arm_compute::TensorShape &input_shape, + const arm_compute::TensorShape &weights_shape, + const arm_compute::PadStrideInfo &conv_info) +{ + unsigned int output_width = 0; + unsigned int output_height = 0; + + // Get output width and height + std::tie(output_width, output_height) = + arm_compute::scaled_dimensions( + input_shape.x(), input_shape.y(), + weights_shape.x(), weights_shape.y(), + conv_info); + + // Create output shape + TensorShape output_shape = input_shape; + output_shape.set(0, output_width); + output_shape.set(1, output_height); + output_shape.set(2, weights_shape[3]); + + return output_shape; +} + +bool convFloat32(const float* inputData, const Shape& inputShape, + const float* filterData, const Shape& filterShape, + const float* biasData, const Shape& biasShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t activation, + float* outputData, const Shape& outputShape) +{ + arm_compute::CLScheduler::get().default_init(); + + uint32_t tsi_c = getSizeOfDimension(inputShape, 0); + uint32_t tsi_h = getSizeOfDimension(inputShape, 1); + uint32_t tsi_w = getSizeOfDimension(inputShape, 2); + uint32_t tsi_n = getSizeOfDimension(inputShape, 3); + + uint32_t tsk_h = getSizeOfDimension(filterShape, 1); + uint32_t tsk_w = getSizeOfDimension(filterShape, 2); + uint32_t tsk_n = getSizeOfDimension(filterShape, 3); + + TensorShape input_shape = TensorShape(tsi_w, tsi_h, tsi_c, tsi_n); + TensorShape filter_shape = TensorShape(tsi_w, tsi_h, tsi_c, tsi_n); + arm_compute::PadStrideInfo conv_info = + arm_compute::PadStrideInfo(stride_width, stride_height, padding_top, padding_bottom); + + TensorShape output_shape = calculate_convolution_layer_output_shape( + input_shape, filter_shape, conv_info); + + uint32_t tso_c = output_shape[0]; + uint32_t tso_w = output_shape[1]; + uint32_t tso_h = output_shape[2]; + uint32_t tso_n = output_shape[3]; + + arm_compute::CLTensor input, output, bias, filter; + + input.allocator()->init(TensorInfo(tsi_w, tsi_h, arm_compute::Format::F32)); + output.allocator()->init(TensorInfo(tso_w, tso_h, arm_compute::Format::F32)); + bias.allocator()->init(TensorInfo(tso_w, tso_h, arm_compute::Format::F32)); + filter.allocator()->init(TensorInfo(tsk_w, tsk_h, arm_compute::Format::F32)); + + input.allocator()->allocate(); + output.allocator()->allocate(); + bias.allocator()->allocate(); + filter.allocator()->allocate(); + + input.map(); + InputAccessor ia(inputData, inputShape); + ia.access_tensor(input); + input.unmap(); + + bias.map(); + BiasAccessor ba(biasData, biasShape); + ba.access_tensor(bias); + bias.unmap(); + + filter.map(); + WeightAccessor fa(filterData, filterShape); + fa.access_tensor(filter); + filter.unmap(); + + arm_compute::CLConvolutionLayer conv_f; + conv_f.configure(&input, &filter, &bias, &output, conv_info); + + arm_compute::CLScheduler::get().sync(); + + conv_f.run(); + + output.map(); + OutputAccessor oa(outputData, outputShape); + oa.access_tensor(output); + output.unmap(); + + return true; +} + +} // namespace acl_runtime + +//----------------------------------------------------------------------------- + +enum COMPUTE_TYPE { + COMPUTE_DEFAULT = 0, + COMPUTE_ACLGRAPH, + COMPUTE_ACLRT +}; + +bool convFloat32(const float* inputData, const Shape& inputShape, + const float* filterData, const Shape& filterShape, + const float* biasData, const Shape& biasShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t activation, + float* outputData, const Shape& outputShape, + COMPUTE_TYPE compType) { + + switch (compType) + { + case COMPUTE_DEFAULT : + return convFloat32(inputData, inputShape, filterData, filterShape, + biasData, biasShape, padding_left, padding_right, + padding_top, padding_bottom, stride_width, stride_height, + activation, outputData, outputShape); + + case COMPUTE_ACLGRAPH : + return acl_graph::convFloat32(inputData, inputShape, filterData, filterShape, + biasData, biasShape, padding_left, padding_right, + padding_top, padding_bottom, stride_width, stride_height, + activation, outputData, outputShape); + + case COMPUTE_ACLRT : + return acl_runtime::convFloat32(inputData, inputShape, filterData, filterShape, + biasData, biasShape, padding_left, padding_right, + padding_top, padding_bottom, stride_width, stride_height, + activation, outputData, outputShape); + } + return false; +} + +//----------------------------------------------------------------------------- + +void dumpData(const char* name, const float* data, const Shape& shape) +{ + uint32_t height = getSizeOfDimension(shape, 1); + uint32_t width = getSizeOfDimension(shape, 2); + + std::cout << "---" << name << "---" << std::endl; + for (int h = 0; h < height; h++) { + std::cout << "H=" << h << " | "; + for (int w = 0; w < width; w++) { + std::cout << data[h * width + w] << ","; + } + std::cout << std::endl; + } +} + +void initData(float* outputData, int num, float value) +{ + for (int i = 0; i < num; i++) { + *(outputData + i) = value; + } +} + +void initDataSeq(float* outputData, int num, float value) +{ + for (int i = 0; i < num; i++) { + *(outputData + i) = value; + value += 1.0; + } +} + +// compareData +// return true if result == expected with the shape info, +// otherwise false +bool compareData(const float* result, const float* expected, const Shape& shape) +{ + NN_CHECK_EQ(shape.dimensions.size(), 4); + + uint32_t height = getSizeOfDimension(shape, 1); + uint32_t width = getSizeOfDimension(shape, 2); + uint32_t numitems = height * width; + for (int item = 0; item < numitems; item++) { + if (*(result + item) != *(expected + item)) { + LOG(ERROR) << "compareData failed: result " << *(result + item) + << ", expected " << *(expected + item) << std::endl; + return false; + } + } + return true; +} + +int test_3x3_1x1_one(COMPUTE_TYPE comptype) +{ + float inputData[9]; + const Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float filterData[9]; + const Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float biasData[1] = { 1.0 }; + const Shape biasShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + int32_t padding_left = 0; + int32_t padding_right = 0; + int32_t padding_top = 0; + int32_t padding_bottom = 0; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + float* outputData = new float[9]; + const Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + float* expectData = new float[9]; + bool bret; + + initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0); + initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + initData(expectData, sizeof(expectData) / sizeof(expectData[0]), 0.0); + + bret = convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + expectData, outputShape, + COMPUTE_DEFAULT); + + bret = convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + outputData, outputShape, + comptype); + + dumpData("Input ", inputData, inputShape); + dumpData("Filter ", filterData, filterShape); + dumpData("Bias ", biasData, biasShape); + dumpData("Output ", outputData, outputShape); + std::cout << std::endl; + + bret = compareData(outputData, expectData, outputShape); + + delete outputData; + delete expectData; + + if (!bret) + { + LOG(ERROR) << "TEST FAILED " << __FUNCTION__ << std::endl; + return -1; + } + return 0; +} + +int test_3x3_3x3_one(void) +{ + float inputData[9]; + const Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float filterData[9]; + const Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float biasData[1] = { 1.0 }; + const Shape biasShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + int32_t padding_left = 1; + int32_t padding_right = 1; + int32_t padding_top = 1; + int32_t padding_bottom = 1; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + float* outputData = new float[9]; + const Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float* expectData = new float[9]; + bool bret; + + initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0); + initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + initData(expectData, sizeof(expectData) / sizeof(expectData[0]), 0.0); + + bret = convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + expectData, outputShape, + COMPUTE_DEFAULT); + + bret = convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + outputData, outputShape, + COMPUTE_ACLGRAPH); + + dumpData("Input ", inputData, inputShape); + dumpData("Filter ", filterData, filterShape); + dumpData("Bias ", biasData, biasShape); + dumpData("Output ", outputData, outputShape); + std::cout << std::endl; + + bret = compareData(outputData, expectData, outputShape); + + delete outputData; + delete expectData; + + if (!bret) + { + LOG(ERROR) << "TEST FAILED " << __FUNCTION__ << std::endl; + return -1; + } + return 0; +} + +int test_3x3_3x3_seq(void) +{ + float inputData[9]; + const Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float filterData[9]; + const Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float biasData[1] = { 1.0 }; + const Shape biasShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 }; + int32_t padding_left = 1; + int32_t padding_right = 1; + int32_t padding_top = 1; + int32_t padding_bottom = 1; + int32_t stride_width = 1; + int32_t stride_height = 1; + int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU); + float* outputData = new float[9]; + const Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 }; + float* expectData = new float[9]; + bool bret; + + initDataSeq(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0); + initDataSeq(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0); + initDataSeq(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0); + initData(expectData, sizeof(expectData) / sizeof(expectData[0]), 0.0); + + bret = convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + expectData, outputShape, + COMPUTE_DEFAULT); + + bret = convFloat32(inputData, inputShape, + filterData, filterShape, + biasData, biasShape, + padding_left, padding_right, + padding_top, padding_bottom, + stride_width, stride_height, + activation, + outputData, outputShape, + COMPUTE_ACLGRAPH); + + dumpData("Input ", inputData, inputShape); + dumpData("Filter ", filterData, filterShape); + dumpData("Bias ", biasData, biasShape); + dumpData("Output ", outputData, outputShape); + std::cout << std::endl; + + bret = compareData(outputData, expectData, outputShape); + + delete outputData; + delete expectData; + + if (!bret) + { + LOG(ERROR) << "TEST FAILED " << __FUNCTION__ << std::endl; + return -1; + } + return 0; +} + +int main(int argc, char* argv[]) +{ + int result; + + // input 3x3, output 1x1, all data 1.0 + result = test_3x3_1x1_one(COMPUTE_ACLGRAPH); + if (result) return result; + result = test_3x3_1x1_one(COMPUTE_ACLRT); + if (result) return result; + + // input 3x3, output 3x3, all data 1.0 + result = test_3x3_3x3_one(); + if (result) return result; + + result = test_3x3_3x3_seq(); + if (result) return result; + + return result; +} diff --git a/contrib/labs/kerneltesting/conv2d/optimized_ops.h b/contrib/labs/kerneltesting/conv2d/optimized_ops.h new file mode 100644 index 000000000..1d8c4ff28 --- /dev/null +++ b/contrib/labs/kerneltesting/conv2d/optimized_ops.h @@ -0,0 +1,339 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_OPS_H_ +#define ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_OPS_H_ + +// Make a local VectorMap typedef allowing to map a float array +// as a Eigen matrix expression. The same explanation as for VectorMap +// above also applies here. +template <typename Scalar> +using MatrixMap = typename std::conditional< + std::is_const<Scalar>::value, + Eigen::Map<const Eigen::Matrix<typename std::remove_const<Scalar>::type, + Eigen::Dynamic, Eigen::Dynamic>>, + Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>>>::type; + +template <typename Scalar, int N> +MatrixMap<Scalar> MapAsMatrixWithFirstDimAsRows(Scalar* data, + const Dims<N>& dims) { + const int rows = dims.sizes[0]; + int cols = 1; + for (int d = 1; d < N; d++) { + cols *= dims.sizes[d]; + } + return MatrixMap<Scalar>(data, rows, cols); +} + +template <typename Scalar, int N> +MatrixMap<Scalar> MapAsMatrixWithLastDimAsCols(Scalar* data, + const Dims<N>& dims) { + const int cols = dims.sizes[N - 1]; + int rows = 1; + for (int d = 0; d < N - 1; d++) { + rows *= dims.sizes[d]; + } + return MatrixMap<Scalar>(data, rows, cols); +} + +template <typename T> +inline void ExtractPatchIntoBufferColumn( + const Dims<4>& input_dims, int w, int h, int b, int kheight, int kwidth, + int stride_width, int stride_height, int pad_width, int pad_height, + int in_width, int in_height, int in_depth, int single_buffer_length, + int buffer_id, const T* in_data, T* conv_buffer_data, uint8 byte_zero) { + gemmlowp::ScopedProfilingLabel label("ExtractPatchIntoBufferColumn"); + // This chunk of code reshapes all the inputs corresponding to + // output (b, h, w) to a column vector in conv_buffer(:, buffer_id). + const int kwidth_times_indepth = kwidth * in_depth; + const int inwidth_times_indepth = in_width * in_depth; + const int ih_ungated_start = h * stride_height - pad_height; + const int ih_ungated_end = (ih_ungated_start + kheight); + const int ih_end = std::min(ih_ungated_end, in_height); + const int iw_ungated_start = w * stride_width - pad_width; + const int iw_ungated_end = (iw_ungated_start + kwidth); + const int iw_end = std::min(iw_ungated_end, in_width); + // If the patch is off the edge of the input image, skip writing those rows + // and columns from the patch into the output array. + const int h_offset = std::max(0, -ih_ungated_start); + const int w_offset = std::max(0, -iw_ungated_start); + const int ih_start = std::max(0, ih_ungated_start); + const int iw_start = std::max(0, iw_ungated_start); + const int single_row_num = + std::min(kwidth - w_offset, in_width - iw_start) * in_depth; + const int output_row_offset = (buffer_id * single_buffer_length); + int out_offset = + output_row_offset + (h_offset * kwidth + w_offset) * in_depth; + int in_offset = Offset(input_dims, 0, iw_start, ih_start, b); + + // Express all of the calculations as padding around the input patch. + const int top_padding = h_offset; + const int bottom_padding = (ih_ungated_end - ih_end); + const int left_padding = w_offset; + const int right_padding = (iw_ungated_end - iw_end); + assert(single_row_num == + ((kwidth - (left_padding + right_padding)) * in_depth)); + + // Write out zeroes to the elements representing the top rows of the input + // patch that are off the edge of the input image. + if (top_padding > 0) { + const int top_row_elements = (top_padding * kwidth * in_depth); + memset(conv_buffer_data + output_row_offset, byte_zero, + (top_row_elements * sizeof(T))); + } + + // If the patch is on the interior of the input image horizontally, just copy + // over the rows sequentially, otherwise add zero padding at the start or end. + if ((left_padding == 0) && (right_padding == 0)) { + for (int ih = ih_start; ih < ih_end; ++ih) { + memcpy(conv_buffer_data + out_offset, in_data + in_offset, + single_row_num * sizeof(T)); + out_offset += kwidth_times_indepth; + in_offset += inwidth_times_indepth; + } + } else { + for (int ih = ih_start; ih < ih_end; ++ih) { + if (left_padding > 0) { + const int left_start = (out_offset - (left_padding * in_depth)); + memset(conv_buffer_data + left_start, byte_zero, + (left_padding * in_depth * sizeof(T))); + } + memcpy(conv_buffer_data + out_offset, in_data + in_offset, + single_row_num * sizeof(T)); + if (right_padding > 0) { + const int right_start = (out_offset + single_row_num); + memset(conv_buffer_data + right_start, byte_zero, + (right_padding * in_depth * sizeof(T))); + } + out_offset += kwidth_times_indepth; + in_offset += inwidth_times_indepth; + } + } + + // If the bottom of the patch falls off the input image, pad the values + // representing those input rows with zeroes. + if (bottom_padding > 0) { + const int bottom_row_elements = (bottom_padding * kwidth * in_depth); + const int bottom_start = + output_row_offset + + ((top_padding + (ih_end - ih_start)) * kwidth * in_depth); + memset(conv_buffer_data + bottom_start, byte_zero, + (bottom_row_elements * sizeof(T))); + } +} + +#ifdef USE_NEON +template <FusedActivationFunctionType Ac> +void AddBiasAndEvalActivationFunction(const float* bias_data, + const Dims<4>& bias_dims, + float* array_data, + const Dims<4>& array_dims) { + gemmlowp::ScopedProfilingLabel label("AddBiasAndEvalActivationFunction"); + const int bias_size = bias_dims.sizes[3] * bias_dims.strides[3]; + const int array_size = array_dims.sizes[3] * array_dims.strides[3]; + DCHECK_EQ((array_size % bias_size), 0); + float* array_ptr = array_data; + float* array_end_ptr = array_ptr + array_size; + const auto zero = vdupq_n_f32(0); + const auto six = vdupq_n_f32(6); + const auto neg_one = vdupq_n_f32(-1); + const auto one = vdupq_n_f32(1); + for (; array_ptr != array_end_ptr; array_ptr += bias_size) { + int i = 0; + for (; i <= bias_size - 16; i += 16) { + auto b0 = vld1q_f32(bias_data + i); + auto b1 = vld1q_f32(bias_data + i + 4); + auto b2 = vld1q_f32(bias_data + i + 8); + auto b3 = vld1q_f32(bias_data + i + 12); + auto a0 = vld1q_f32(array_ptr + i); + auto a1 = vld1q_f32(array_ptr + i + 4); + auto a2 = vld1q_f32(array_ptr + i + 8); + auto a3 = vld1q_f32(array_ptr + i + 12); + auto x0 = vaddq_f32(a0, b0); + auto x1 = vaddq_f32(a1, b1); + auto x2 = vaddq_f32(a2, b2); + auto x3 = vaddq_f32(a3, b3); + if (Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6) { + x0 = vmaxq_f32(zero, x0); + x1 = vmaxq_f32(zero, x1); + x2 = vmaxq_f32(zero, x2); + x3 = vmaxq_f32(zero, x3); + if (Ac == FusedActivationFunctionType::kRelu6) { + x0 = vminq_f32(six, x0); + x1 = vminq_f32(six, x1); + x2 = vminq_f32(six, x2); + x3 = vminq_f32(six, x3); + } + } else if (Ac == FusedActivationFunctionType::kRelu1) { + x0 = vmaxq_f32(neg_one, x0); + x1 = vmaxq_f32(neg_one, x1); + x2 = vmaxq_f32(neg_one, x2); + x3 = vmaxq_f32(neg_one, x3); + x0 = vminq_f32(one, x0); + x1 = vminq_f32(one, x1); + x2 = vminq_f32(one, x2); + x3 = vminq_f32(one, x3); + } + vst1q_f32(array_ptr + i, x0); + vst1q_f32(array_ptr + i + 4, x1); + vst1q_f32(array_ptr + i + 8, x2); + vst1q_f32(array_ptr + i + 12, x3); + } + for (; i <= bias_size - 4; i += 4) { + auto b = vld1q_f32(bias_data + i); + auto a = vld1q_f32(array_ptr + i); + auto x = vaddq_f32(a, b); + if (Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6) { + x = vmaxq_f32(zero, x); + if (Ac == FusedActivationFunctionType::kRelu6) { + x = vminq_f32(six, x); + } + } else if (Ac == FusedActivationFunctionType::kRelu1) { + x = vmaxq_f32(neg_one, x); + x = vminq_f32(one, x); + } + vst1q_f32(array_ptr + i, x); + } + for (; i < bias_size; i++) { + array_ptr[i] = ActivationFunction<Ac>(array_ptr[i] + bias_data[i]); + } + } +} +#else // not NEON +template <FusedActivationFunctionType Ac> +void AddBiasAndEvalActivationFunction(const float* bias_data, + const Dims<4>& bias_dims, + float* array_data, + const Dims<4>& array_dims) { + gemmlowp::ScopedProfilingLabel label("AddBiasAndEvalActivationFunction"); + const int bias_size = bias_dims.sizes[3] * bias_dims.strides[3]; + const int array_size = array_dims.sizes[3] * array_dims.strides[3]; + DCHECK_EQ((array_size % bias_size), 0); + for (int array_offset = 0; array_offset < array_size; + array_offset += bias_size) { + for (int i = 0; i < bias_size; i++) { + array_data[array_offset + i] = + ActivationFunction<Ac>(array_data[array_offset + i] + bias_data[i]); + } + } +} +#endif + +template <typename Lhs, typename Rhs, typename Result> +void Gemm(const Eigen::MatrixBase<Lhs>& lhs, const Eigen::MatrixBase<Rhs>& rhs, + Eigen::MatrixBase<Result>* result) { + if (rhs.cols() == 1) { + gemmlowp::ScopedProfilingLabel label("GEMV"); + result->col(0).noalias() = lhs * rhs.col(0); + } else { + gemmlowp::ScopedProfilingLabel label("GEMM"); + result->noalias() = lhs * rhs; + } +} + +template <typename T> +void Im2col(const T* input_data, const Dims<4>& input_dims, int stride_width, + int stride_height, int pad_width, int pad_height, int kheight, + int kwidth, uint8 byte_zero, T* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Im2col"); + DCHECK(IsPackedWithoutStrides(input_dims)); + DCHECK(IsPackedWithoutStrides(output_dims)); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_depth = ArraySize(input_dims, 0); + const int input_width = ArraySize(input_dims, 1); + const int input_height = ArraySize(input_dims, 2); + const int output_depth = ArraySize(output_dims, 0); + const int output_width = ArraySize(output_dims, 1); + const int output_height = ArraySize(output_dims, 2); + + int buffer_id = 0; + // Loop over the output nodes. + for (int b = 0; b < batches; ++b) { + for (int h = 0; h < output_height; ++h) { + for (int w = 0; w < output_width; ++w) { + ExtractPatchIntoBufferColumn( + input_dims, w, h, b, kheight, kwidth, stride_width, stride_height, + pad_width, pad_height, input_width, input_height, input_depth, + output_depth, buffer_id, input_data, output_data, byte_zero); + ++buffer_id; + } + } + } +} + +template <FusedActivationFunctionType Ac> +void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, int stride_width, + int stride_height, int pad_width, int pad_height, float* output_data, + const Dims<4>& output_dims, float* im2col_data, + const Dims<4>& im2col_dims) { + (void)im2col_data; + (void)im2col_dims; + gemmlowp::ScopedProfilingLabel label("Conv"); + + const float* gemm_input_data = nullptr; + const Dims<4>* gemm_input_dims = nullptr; + const int filter_width = ArraySize(filter_dims, 1); + const int filter_height = ArraySize(filter_dims, 2); + const bool need_im2col = stride_width != 1 || stride_height != 1 || + filter_width != 1 || filter_height != 1; + if (need_im2col) { + DCHECK(im2col_data); + Im2col(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_height, filter_width, 0, im2col_data, + im2col_dims); + gemm_input_data = im2col_data; + gemm_input_dims = &im2col_dims; + } else { + DCHECK(!im2col_data); + gemm_input_data = input_data; + gemm_input_dims = &input_dims; + } + + const auto im2col_matrix_map = + MapAsMatrixWithFirstDimAsRows(gemm_input_data, *gemm_input_dims); + const auto filter_matrix_map = + MapAsMatrixWithLastDimAsCols(filter_data, filter_dims); + auto output_matrix_map = + MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + + Gemm(filter_matrix_map.transpose(), im2col_matrix_map, &output_matrix_map); + + AddBiasAndEvalActivationFunction<Ac>(bias_data, bias_dims, output_data, + output_dims); +} + +#endif // ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_OPS_H_ diff --git a/contrib/labs/kerneltesting/conv2d/types.h b/contrib/labs/kerneltesting/conv2d/types.h new file mode 100644 index 000000000..3d09457c7 --- /dev/null +++ b/contrib/labs/kerneltesting/conv2d/types.h @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_TYPES_H_ +#define ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_TYPES_H_ + +enum class OperandType : int32_t { + FLOAT32 = 0, + INT32 = 1, + UINT32 = 2, + TENSOR_FLOAT32 = 3, + TENSOR_INT32 = 4, + TENSOR_QUANT8_ASYMM = 5, + OEM = 10000, + TENSOR_OEM_BYTE = 10001, +}; + +#include "compatibility.h" + +enum class FusedActivationFunctionType { kNone, kRelu6, kRelu1, kRelu }; + +template <int N> +struct Dims { + int sizes[N]; + int strides[N]; +}; + +// The type and dimensions of an operand. +struct Shape { + OperandType type; + std::vector<uint32_t> dimensions; + float scale; + int32_t offset; +}; + +inline uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx) { + if (dimensionIdx >= shape.dimensions.size()) { + // TODO, log the error + return 0; + } + return shape.dimensions[dimensionIdx]; +} + +inline Dims<4> convertShapeToDims(const Shape& shape) { + Dims<4> dims; + for (int i=0; i<4; i++) { + dims.sizes[i] = 1; + } + + if (shape.dimensions.size() == 1) { + dims.sizes[0] = (int)getSizeOfDimension(shape, 0); + } else { + for (int i=0; i<4; i++) { + int src = (int)shape.dimensions.size()-i-1; + if (src >= 0) { + dims.sizes[i] = (int)getSizeOfDimension(shape, src); + } + } + } + + dims.strides[0] = 1; + for (int i = 1; i<4; i++) { + dims.strides[i] = dims.strides[i-1] * dims.sizes[i-1]; + } + return dims; +} + +inline int Offset(const Dims<4>& dims, int i0, int i1, int i2, int i3) { + DCHECK(i0 >= 0 && i0 < dims.sizes[0]); + DCHECK(i1 >= 0 && i1 < dims.sizes[1]); + DCHECK(i2 >= 0 && i2 < dims.sizes[2]); + DCHECK(i3 >= 0 && i3 < dims.sizes[3]); + return i0 * dims.strides[0] + i1 * dims.strides[1] + i2 * dims.strides[2] + + i3 * dims.strides[3]; +} + +// Get array size, DCHECKing that the dim index is in range. +template <int N> +int ArraySize(const Dims<N>& array, int index) { + DCHECK(index >= 0 && index < N); + return array.sizes[index]; +} + +// Get common array size, DCHECKing that they all agree. +template <typename ArrayType1, typename ArrayType2> +int MatchingArraySize(const ArrayType1& array1, int index1, + const ArrayType2& array2, int index2) { + DCHECK_EQ(ArraySize(array1, index1), ArraySize(array2, index2)); + return ArraySize(array1, index1); +} + +template <typename ArrayType1, typename ArrayType2, typename... Args> +int MatchingArraySize(const ArrayType1& array1, int index1, + const ArrayType2& array2, int index2, Args... args) { + DCHECK_EQ(ArraySize(array1, index1), ArraySize(array2, index2)); + return MatchingArraySize(array1, index1, args...); +} + +inline int RequiredBufferSizeForDims(const Dims<4>& dims) { + int max_offset = 0; + for (int i = 0; i < 4; i++) { + max_offset += (dims.sizes[i] - 1) * dims.strides[i]; + } + return max_offset + 1; +} + +template <int N> +bool IsPackedWithoutStrides(const Dims<N>& dims) { + int expected_stride = 1; + for (int d = 0; d < N; d++) { + if (dims.strides[d] != expected_stride) return false; + expected_stride *= dims.sizes[d]; + } + return true; +} + +#endif // ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_TYPES_H_ diff --git a/contrib/labs/opencl_test/CMakeLists.txt b/contrib/labs/opencl_test/CMakeLists.txt new file mode 100644 index 000000000..5e99fa051 --- /dev/null +++ b/contrib/labs/opencl_test/CMakeLists.txt @@ -0,0 +1,12 @@ +if(NOT ${TARGET_ARCH_BASE} STREQUAL "arm") + return() +endif(NOT ${TARGET_ARCH_BASE} STREQUAL "arm") + +list(APPEND OPENCL_INFO_SOURCE "src/opencl_test.cc") + +add_executable(opencl_test ${OPENCL_INFO_SOURCE}) +target_include_directories(opencl_test PUBLIC ${CMAKE_SOURCE_DIR}/externals/acl) +target_include_directories(opencl_test PUBLIC ${CMAKE_SOURCE_DIR}/externals/acl/include) +target_include_directories(opencl_test PUBLIC ${CMAKE_SOURCE_DIR}/libs/ARMComputeEx) +target_link_libraries(opencl_test arm_compute) +target_link_libraries(opencl_test arm_compute_ex) diff --git a/contrib/labs/opencl_test/README.md b/contrib/labs/opencl_test/README.md new file mode 100644 index 000000000..950528f81 --- /dev/null +++ b/contrib/labs/opencl_test/README.md @@ -0,0 +1,8 @@ +This directory contains experients of OpenCL code. + +How to run: +``` +LD_LIBRARY_PATH=Product/out/lib Product/obj/contrib/opencl_test/opencl_test [option] +``` + - `[option]` + - `-g`: prints devices inside GPU and check if they use same memory address diff --git a/contrib/labs/opencl_test/src/opencl_test.cc b/contrib/labs/opencl_test/src/opencl_test.cc new file mode 100644 index 000000000..93994ae43 --- /dev/null +++ b/contrib/labs/opencl_test/src/opencl_test.cc @@ -0,0 +1,397 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/******************************************************************************* + * Copyright (c) 2008-2015 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/CL/OpenCLEx.h" + +#include <iostream> +#include <vector> + +void printDeviceInfo(int n, cl::Device &device, cl::Device &default_device) +{ + bool is_default = (device() == default_device()); + std::cout << "\t\t\t#" << n << " Device: (id: " << device() << ") " + << (is_default ? " -> default" : "") << "\n"; + + const auto name = device.getInfo<CL_DEVICE_NAME>(); + std::cout << "\t\t\t\tName: " << name << "\n"; + + const auto compute_unit = device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(); + std::cout << "\t\t\t\tMax Compute Unit: " << compute_unit << "\n"; + + const auto max_work_item_size = device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>(); + std::cout << "\t\t\t\tMax Work Item Size: ["; + for (auto size : max_work_item_size) + std::cout << size << ","; + std::cout << "]\n"; + + const auto max_work_group_size = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>(); + std::cout << "\t\t\t\tMax Work Grpup Size: " << max_work_group_size << "\n"; + + const auto max_clock_frequency = device.getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>(); + std::cout << "\t\t\t\tMax Clock Frequency: " << max_clock_frequency << "\n"; + + std::cout << "\n"; +} + + +class OpenCLGpu +{ + public: + cl::Platform platform_; + cl::Context context_; + cl::vector<cl::Device> devices_; + std::vector<cl::CommandQueue*> q_; + cl::Program program_; + + OpenCLGpu() + { + cl_int cl_error; + + platform_ = cl::Platform::getDefault(); + + try + { + cl_context_properties properties[3] = { + CL_CONTEXT_PLATFORM, (cl_context_properties)platform_(), 0 + }; + + context_ = cl::Context(CL_DEVICE_TYPE_GPU, properties, NULL, NULL, &cl_error); + } + catch (cl::Error &err) // thrown when there is no Context for this platform + { + std::cout << "\t\t No Context Found\n"; + return; + } + + devices_ = context_.getInfo<CL_CONTEXT_DEVICES>(); + + for (int dev_id = 0; dev_id < devices_.size(); dev_id++) + { + cl::CommandQueue* que = new cl::CommandQueue(context_, devices_[dev_id]); + q_.emplace_back(que); + } + } + + ~OpenCLGpu() + { + for (auto each_q : q_) + delete each_q; + } + + void buildProgram(std::string& kernel_source_code) + { + std::vector<std::string> programStrings {kernel_source_code}; + + program_ = cl::Program(context_, programStrings); + + try + { + program_.build("-cl-std=CL1.2"); + } + catch (cl::Error &err) + { + cl_int buildErr = CL_SUCCESS; + auto buildInfo = program_.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr); + for (auto &pair : buildInfo) { + std::cerr << pair.second << std::endl << std::endl; + } + } + } +}; + + +void checkContextMem() +{ + cl_int cl_error; + + // get context, devices + // + std::cout << "\nChecking if devices in GPU shares the same memory address:\n\n"; + + OpenCLGpu gpu; + + std::cout << "\nDevices in GPU:\n\n"; + + auto &devices = gpu.devices_; + auto default_device = cl::Device::getDefault(); + + int d = 0; + for (auto device : devices) + printDeviceInfo(++d, device, default_device); + + if (d < 2) + { + std::cout << "\t\t This options works when there are n (>= 2) devices.\n"; + return; + } + + // allocate and map memory + + typedef cl_int T; + const int items_per_device = 128; + const int length = items_per_device * devices.size(); + + std::vector<T> input(length); + std::vector<T> output(length, 0); + + for (int i = 0; i < length; i++) + input[i] = i; + + cl::Buffer input_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length*sizeof(T), input.data(), &cl_error); + cl::Buffer output_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length*sizeof(T), output.data(), &cl_error); + + // compile test cl code + + std::string kernel_source { + "typedef int T; \n" \ + "kernel void memory_test( \n" \ + " const int dev_id, \n" \ + " global T* input, \n" \ + " global T* output, \n" \ + " const int start_idx, \n" \ + " const int count) \n" \ + "{ \n" \ + " int input_idx = get_global_id(0); \n" \ + " if(input_idx < count) \n" \ + " { \n" \ + " int output_idx = start_idx + input_idx; \n" \ + " output[output_idx] = input[input_idx] + dev_id; \n" \ + " } \n" \ + "} \n" + }; + + gpu.buildProgram(kernel_source); + + try + { + auto kernel_functor = cl::KernelFunctor<cl_int, cl::Buffer, cl::Buffer, cl_int, cl_int> + (gpu.program_, "memory_test"); // name should be same as cl function name + + // create a queue per device and queue a kernel job + + for (int dev_id = 0; dev_id < devices.size(); dev_id++) + { + kernel_functor( + cl::EnqueueArgs( + *(gpu.q_[dev_id]), + cl::NDRange(items_per_device)), + (cl_int)dev_id, // dev id + input_buf, + output_buf, + (cl_int)(items_per_device * dev_id), // start index + (cl_int)(items_per_device), // count + cl_error + ); + } + + // sync + + for (d = 0; d < devices.size(); d++) + (gpu.q_[d])->finish(); + + // check if memory state changed by all devices + + cl::copy(*(gpu.q_[0]), output_buf, begin(output), end(output)); + + bool use_same_memory = true; + + for (int dev_id = 0; dev_id < devices.size(); dev_id++) + { + for (int i = 0; i < items_per_device; ++i) + { + int output_idx = items_per_device * dev_id + i; + if (output[output_idx] != input[i] + dev_id) + { + std::cout << "Output[" << output_idx << "] : " + << "expected = " << input[i] + dev_id + << "; actual = " << output[output_idx] << "\n"; + use_same_memory = false; + break; + } + } + } + + if (use_same_memory) + std::cout << "\n=> Mapped memory addresses used by devices in GPU are same.\n\n"; + else + std::cout << "\n=> Mapped memory addresses used by devices in GPU are different.\n\n"; + } + catch (cl::Error &err) + { + std::cerr << "error: code: " << err.err() << ", what: " << err.what() << std::endl; + } +} + +void printHelp() +{ + std::cout << "opencl information: \n\n"; + std::cout << "\t -h : help\n"; + std::cout << "\t -g : print if memory map is shared among devices in GPU (in default platform)\n\n"; + std::cout << "\t -s : test for synchronized work by two devices in a GPU\n\n"; +} + +#include <mutex> +#include <chrono> +#include <thread> +#include <condition_variable> + +#define MAX_DEVICE_NUM 8 // just for testing + +int kernel_idx[MAX_DEVICE_NUM]; +unsigned char kernel_completed = 0x00; // bit 0 = 1 means kernel by device[0] was completed. +unsigned char kernel_completed_flag; // if comparing kernel_completed with this var, all kernels are completed +int device_num; +std::mutex kernel_complete_handler_mutex; + +std::condition_variable wakeup_main; +std::mutex wakeup_main_mutex; + +void notifyKernelFinished(cl_event ev, cl_int ev_info, void * device_idx) +{ + std::cout << "callback from device[" << *((int*)device_idx) << "] : ==> completed.\n"; + + std::unique_lock<std::mutex> lock(kernel_complete_handler_mutex); + + kernel_completed |= 0x01 << *((int*)device_idx); + if (kernel_completed == kernel_completed_flag) + wakeup_main.notify_one(); +} + +void testSync() +{ + OpenCLGpu gpu; + + cl_int cl_error; + typedef cl_int T; + const int items_per_device = 1024*768; + const int length = items_per_device * gpu.devices_.size(); + + std::vector<T> output(length, 0); + + cl::Buffer output_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length*sizeof(T), output.data(), &cl_error); + + std::string kernel_source { + "kernel void test(global float* output, const int count) \n" \ + "{ \n" \ + " int idx = get_global_id(0); \n" \ + " if(idx < count) \n" \ + " { \n" \ + " float x = hypot(idx/1.111, idx*1.111); \n" \ + " for (int y = 0; y < 200; y++) \n" \ + " x = rootn(log(pown(rootn(log(pown(x, 20)), 5), 20)), 5); \n" \ + " output[idx] = x; \n" \ + " } \n" \ + "} \n" + }; + + gpu.buildProgram(kernel_source); + + try + { + auto kernel_functor = cl::KernelFunctor<cl::Buffer, cl_int> + (gpu.program_, "test"); // name should be same as cl function name + + // variable init + cl::Event ev[MAX_DEVICE_NUM]; + + device_num = gpu.devices_.size(); + + kernel_completed = 0; + kernel_completed_flag = 0; + for (int i = 0; i < device_num; i++) + { + kernel_idx[i] = i; + kernel_completed_flag |= 0x01 << i; + } + + // create a queue per device and queue a kernel job + // queueing with callback function + for (int dev_id = 0; dev_id < gpu.devices_.size(); dev_id++) + { + ev[dev_id] = kernel_functor( + cl::EnqueueArgs( + *(gpu.q_[dev_id]), + cl::NDRange(items_per_device)), + output_buf, + (cl_int)(items_per_device), // count + cl_error + ); + ev[dev_id].setCallback(CL_COMPLETE, notifyKernelFinished, (void*)(kernel_idx+dev_id)); + + // how to check kernel execution status + // + // auto status = ev[dev_id].getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>(); + // std::cout << "Event status = " << (status == CL_QUEUED ? "CL_QUEUED" : status == CL_SUBMITTED ? "CL_SUBMITTED" : status == CL_COMPLETE ? "CL_COMPLETE" : "unknown") + // << std::endl; + // std::cout << "Event status code = " << status << std::endl; + } + + // long wait until kernels are over + { + std::unique_lock<std::mutex> lk(wakeup_main_mutex); + wakeup_main.wait(lk, []{ return (kernel_completed == kernel_completed_flag); }); + + std::cout << "all devices were completed.\n"; + } + } + catch (cl::Error &err) + { + std::cerr << "error: code: " << err.err() << ", what: " << err.what() << std::endl; + } + +} + +int main(const int argc, char **argv) +{ + if (argc < 2) + printHelp(); + else + { + std::string option = argv[1]; + + if (option == "-h") // help + printHelp(); + else if (option == "-g") // check if devices in GPU uses same memory address + checkContextMem(); + else if (option == "-s") // check synchronization between devices in GPU + testSync(); + } + return 0; +} diff --git a/contrib/labs/tflite_examples/CMakeLists.txt b/contrib/labs/tflite_examples/CMakeLists.txt new file mode 100644 index 000000000..463bc5531 --- /dev/null +++ b/contrib/labs/tflite_examples/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable(tflite_conv_example "src/conv.cpp") +target_link_libraries(tflite_conv_example tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_tflite) diff --git a/contrib/labs/tflite_examples/src/conv.cpp b/contrib/labs/tflite_examples/src/conv.cpp new file mode 100644 index 000000000..e517da9f3 --- /dev/null +++ b/contrib/labs/tflite_examples/src/conv.cpp @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tflite/ext/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/builtin_op_data.h" + +#include <iostream> + +using namespace tflite; +using namespace nnfw::tflite; + +namespace vector +{ + +template <typename T> struct View +{ + virtual ~View() = default; + + virtual int32_t size(void) const = 0; + virtual T at(uint32_t off) const = 0; +}; +} + +namespace feature +{ + +struct Shape +{ + int32_t C; + int32_t H; + int32_t W; +}; + +template <typename T> struct View +{ + virtual ~View() = default; + + virtual const Shape &shape(void) const = 0; + virtual T at(uint32_t ch, uint32_t row, uint32_t col) const = 0; +}; +} + +namespace kernel +{ + +struct Shape +{ + int32_t N; + int32_t C; + int32_t H; + int32_t W; +}; + +template <typename T> struct View +{ + virtual ~View() = default; + + virtual const Shape &shape(void) const = 0; + virtual T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const = 0; +}; +} + +const int32_t N = 1; +const int32_t C = 2; + +class SampleBiasObject final : public vector::View<float> +{ +public: + SampleBiasObject() : _size(N) + { + // DO NOTHING + } + +public: + int32_t size(void) const override { return _size; } + + float at(uint32_t off) const override { return 0.0f; } + +private: + int32_t _size; +}; + +class SampleFeatureObject final : public feature::View<float> +{ +public: + SampleFeatureObject() + { + _shape.C = C; + _shape.H = 3; + _shape.W = 4; + + const uint32_t size = _shape.C * _shape.H * _shape.W; + + for (uint32_t off = 0; off < size; ++off) + { + _value.emplace_back(off); + } + + assert(_value.size() == size); + } + +public: + const feature::Shape &shape(void) const override { return _shape; }; + + float at(uint32_t ch, uint32_t row, uint32_t col) const override + { + return _value.at(ch * _shape.H * _shape.W + row * _shape.W + col); + } + +public: + float &at(uint32_t ch, uint32_t row, uint32_t col) + { + return _value.at(ch * _shape.H * _shape.W + row * _shape.W + col); + } + +private: + feature::Shape _shape; + std::vector<float> _value; +}; + +class SampleKernelObject final : public kernel::View<float> +{ +public: + SampleKernelObject() + { + _shape.N = N; + _shape.C = C; + _shape.H = 3; + _shape.W = 4; + + const uint32_t size = _shape.N * _shape.C * _shape.H * _shape.W; + + for (uint32_t off = 0; off < size; ++off) + { + _value.emplace_back(off); + } + + assert(_value.size() == size); + } + +public: + const kernel::Shape &shape(void) const override { return _shape; }; + + float at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const override + { + return _value.at(nth * _shape.C * _shape.H * _shape.W + ch * _shape.H * _shape.W + + row * _shape.W + col); + } + +private: + kernel::Shape _shape; + std::vector<float> _value; +}; + +int main(int argc, char **argv) +{ + const SampleFeatureObject ifm; + const SampleKernelObject kernel; + const SampleBiasObject bias; + + const int32_t IFM_C = ifm.shape().C; + const int32_t IFM_H = ifm.shape().H; + const int32_t IFM_W = ifm.shape().W; + + const int32_t KER_N = kernel.shape().N; + const int32_t KER_C = kernel.shape().C; + const int32_t KER_H = kernel.shape().H; + const int32_t KER_W = kernel.shape().W; + + const int32_t OFM_C = kernel.shape().N; + const int32_t OFM_H = (IFM_H - KER_H) + 1; + const int32_t OFM_W = (IFM_W - KER_W) + 1; + + // Assumption on this example + assert(IFM_C == KER_C); + assert(KER_N == bias.size()); + + // Comment from 'context.h' + // + // Parameters for asymmetric quantization. Quantized values can be converted + // back to float using: + // real_value = scale * (quantized_value - zero_point); + // + // Q: Is this necessary? + TfLiteQuantizationParams quantization; + + quantization.scale = 1; + quantization.zero_point = 0; + + Interpreter interp; + + // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) + interp.AddTensors(5); + + // Configure OFM + interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, + {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization); + + // Configure IFM + interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, + {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization); + + // Configure Filter + const uint32_t kernel_size = KER_N * KER_C * KER_H * KER_W; + float kernel_data[kernel_size] = { + 0.0f, + }; + + // Fill kernel data in NHWC order + { + uint32_t off = 0; + + for (uint32_t nth = 0; nth < KER_N; ++nth) + { + for (uint32_t row = 0; row < KER_H; ++row) + { + for (uint32_t col = 0; col < KER_W; ++col) + { + for (uint32_t ch = 0; ch < KER_C; ++ch) + { + const auto value = kernel.at(nth, ch, row, col); + kernel_data[off++] = value; + } + } + } + } + + assert(kernel_size == off); + } + + interp.SetTensorParametersReadOnly( + 2, kTfLiteFloat32 /* type */, "filter" /* name */, {KER_N, KER_H, KER_W, KER_C} /* dims */, + quantization, reinterpret_cast<const char *>(kernel_data), sizeof(kernel_data)); + + // Configure Bias + const uint32_t bias_size = bias.size(); + float bias_data[bias_size] = { + 0.0f, + }; + + // Fill bias data + for (uint32_t off = 0; off < bias.size(); ++off) + { + bias_data[off] = bias.at(off); + } + + interp.SetTensorParametersReadOnly(3, kTfLiteFloat32 /* type */, "bias" /* name */, + {bias.size()} /* dims */, quantization, + reinterpret_cast<const char *>(bias_data), sizeof(bias_data)); + + // Add Convolution Node + // + // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free + // So, param should be allocated with malloc + TfLiteConvParams *param = reinterpret_cast<TfLiteConvParams *>(malloc(sizeof(TfLiteConvParams))); + + param->padding = kTfLitePaddingValid; + param->stride_width = 1; + param->stride_height = 1; + param->activation = kTfLiteActRelu; + + // Run Convolution and store its result into Tensor #0 + // - Read IFM from Tensor #1 + // - Read Filter from Tensor #2, + // - Read Bias from Tensor #3 + interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast<void *>(param), + BuiltinOpResolver().FindOp(BuiltinOperator_CONV_2D, 1)); + + // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 + interp.SetInputs({1}); + interp.SetOutputs({0}); + + // Let's use NNAPI (if possible) + interp.UseNNAPI(true); + + // Allocate Tensor + interp.AllocateTensors(); + + // Fill IFM data in HWC order + { + uint32_t off = 0; + + for (uint32_t row = 0; row < ifm.shape().H; ++row) + { + for (uint32_t col = 0; col < ifm.shape().W; ++col) + { + for (uint32_t ch = 0; ch < ifm.shape().C; ++ch) + { + const auto value = ifm.at(ch, row, col); + interp.typed_input_tensor<float>(0)[off++] = value; + } + } + } + } + + // Let's Rock-n-Roll! + interp.Invoke(); + + // Print OFM + { + uint32_t off = 0; + + for (uint32_t row = 0; row < OFM_H; ++row) + { + for (uint32_t col = 0; col < OFM_W; ++col) + { + for (uint32_t ch = 0; ch < kernel.shape().N; ++ch) + { + std::cout << interp.typed_output_tensor<float>(0)[off++] << std::endl; + } + } + } + } + + return 0; +} |