summaryrefslogtreecommitdiff
path: root/contrib/labs/kerneltesting
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/labs/kerneltesting')
-rw-r--r--contrib/labs/kerneltesting/CMakeLists.txt19
-rw-r--r--contrib/labs/kerneltesting/conv2d/CMakeLists.txt15
-rw-r--r--contrib/labs/kerneltesting/conv2d/OperationUtils.h90
-rw-r--r--contrib/labs/kerneltesting/conv2d/common.h89
-rw-r--r--contrib/labs/kerneltesting/conv2d/compatibility.h78
-rw-r--r--contrib/labs/kerneltesting/conv2d/io_accessor.cpp124
-rw-r--r--contrib/labs/kerneltesting/conv2d/io_accessor.h104
-rw-r--r--contrib/labs/kerneltesting/conv2d/nnfw_conv2d_test.cpp607
-rw-r--r--contrib/labs/kerneltesting/conv2d/optimized_ops.h339
-rw-r--r--contrib/labs/kerneltesting/conv2d/types.h146
10 files changed, 1611 insertions, 0 deletions
diff --git a/contrib/labs/kerneltesting/CMakeLists.txt b/contrib/labs/kerneltesting/CMakeLists.txt
new file mode 100644
index 000000000..5792d0fe8
--- /dev/null
+++ b/contrib/labs/kerneltesting/CMakeLists.txt
@@ -0,0 +1,19 @@
+if(NOT ${TARGET_ARCH_BASE} STREQUAL "arm")
+ return()
+endif(NOT ${TARGET_ARCH_BASE} STREQUAL "arm")
+
+nnfw_find_package(ARMCompute REQUIRED)
+
+function(add_kerneltesting TESTNAME SRC_FILES)
+ link_directories(${CMAKE_INSTALL_PREFIX}/lib)
+ add_executable(${TESTNAME} ${SRC_FILES})
+ target_include_directories(${TESTNAME} PUBLIC
+ ${NNFW_INCLUDE_DIR})
+ target_link_libraries(${TESTNAME} nnfw_lib_misc arm_compute_graph)
+ install(TARGETS ${TESTNAME} DESTINATION bin)
+endfunction()
+
+# TODO: Enable conv2d on Tizen
+if (NOT ${TARGET_OS} STREQUAL "tizen")
+ add_subdirectory(conv2d)
+endif()
diff --git a/contrib/labs/kerneltesting/conv2d/CMakeLists.txt b/contrib/labs/kerneltesting/conv2d/CMakeLists.txt
new file mode 100644
index 000000000..25e01f584
--- /dev/null
+++ b/contrib/labs/kerneltesting/conv2d/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(KERNELTESTING_CONV2D kerneltesting_conv2d)
+
+set(KERNELTESTING_CONV2D_SRCS "nnfw_conv2d_test.cpp"
+ "io_accessor.cpp")
+
+set(GEMLOWP_INCUDE ${TFLITE_DEPEND_DIR}/gemmlowp/public)
+set(EIGN_INCLUDE ${TFLITE_DEPEND_DIR}/eigen
+ ${TFLITE_DEPEND_DIR}/eigen/Eigen)
+
+add_kerneltesting(${KERNELTESTING_CONV2D} "${KERNELTESTING_CONV2D_SRCS}")
+
+target_include_directories(${KERNELTESTING_CONV2D} PUBLIC
+ ${GEMLOWP_INCUDE}
+ ${EIGN_INCLUDE}
+ )
diff --git a/contrib/labs/kerneltesting/conv2d/OperationUtils.h b/contrib/labs/kerneltesting/conv2d/OperationUtils.h
new file mode 100644
index 000000000..0beac80a4
--- /dev/null
+++ b/contrib/labs/kerneltesting/conv2d/OperationUtils.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_UTILS_H
+#define ANDROID_ML_NN_COMMON_OPERATIONS_UTILS_H
+
+#include <cstdint>
+#include <vector>
+#include <ostream>
+
+#define LOG(ERROR) std::cerr
+
+// Macro to check if the input parameters for operation are valid or not.
+#define NN_CHECK(v) \
+ do { \
+ if (!(v)) { \
+ LOG(ERROR) << "NN_CHECK failed: " << #v << "'\n"; \
+ return false; \
+ } \
+ } while(0);
+
+#define NN_CHECK_EQ(actual, expected) \
+ NN_CHECK((actual) == (expected))
+
+#define NN_OPS_CHECK NN_CHECK
+
+enum PaddingScheme {
+ kPaddingUnknown = 0,
+ kPaddingSame = 1,
+ kPaddingValid = 2,
+};
+
+enum class FusedActivationFunc : int32_t {
+ NONE = 0,
+ RELU = 1,
+ RELU1 = 2,
+ RELU6 = 3,
+};
+
+
+#define ANDROID_NN_MACRO_DISPATCH(macro) \
+ switch (activation) { \
+ case (int32_t) FusedActivationFunc::NONE: \
+ macro(kNone); \
+ break; \
+ case (int32_t) FusedActivationFunc::RELU: \
+ macro(kRelu); \
+ break; \
+ case (int32_t) FusedActivationFunc::RELU1: \
+ macro(kRelu1); \
+ break; \
+ case (int32_t) FusedActivationFunc::RELU6: \
+ macro(kRelu6); \
+ break; \
+ default: \
+ LOG(ERROR) << "Unsupported fused activation function type"; \
+ return false; \
+ }
+
+
+#endif // ANDROID_ML_NN_COMMON_OPERATIONS_UTILS_H
diff --git a/contrib/labs/kerneltesting/conv2d/common.h b/contrib/labs/kerneltesting/conv2d/common.h
new file mode 100644
index 000000000..8e675e664
--- /dev/null
+++ b/contrib/labs/kerneltesting/conv2d/common.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMMON_H_
+#define ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMMON_H_
+
+#ifndef USE_NEON
+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+#define USE_NEON
+#include <arm_neon.h>
+#endif
+#endif
+
+#include <gemmlowp.h>
+#include "types.h"
+
+template <FusedActivationFunctionType Ac>
+struct ActivationFunctionImpl {};
+
+template <>
+struct ActivationFunctionImpl<FusedActivationFunctionType::kNone> {
+ static float Eval(float x) { return x; }
+};
+
+template <>
+struct ActivationFunctionImpl<FusedActivationFunctionType::kRelu> {
+ static float Eval(float x) { return x < 0.f ? 0.f : x; }
+};
+
+template <>
+struct ActivationFunctionImpl<FusedActivationFunctionType::kRelu1> {
+ static float Eval(float x) { return x > 1.f ? 1.f : x < -1.f ? -1.f : x; }
+};
+
+template <>
+struct ActivationFunctionImpl<FusedActivationFunctionType::kRelu6> {
+ static float Eval(float x) { return x > 6.f ? 6.f : x < 0.f ? 0.f : x; }
+};
+
+template <FusedActivationFunctionType Ac>
+float ActivationFunction(float x) {
+ return ActivationFunctionImpl<Ac>::Eval(x);
+}
+
+inline int32 MultiplyByQuantizedMultiplierSmallerThanOne(
+ int32 x, int32 quantized_multiplier, int right_shift) {
+ using gemmlowp::RoundingDivideByPOT;
+ using gemmlowp::SaturatingRoundingDoublingHighMul;
+ return RoundingDivideByPOT(
+ SaturatingRoundingDoublingHighMul(x, quantized_multiplier), right_shift);
+}
+
+inline int32 MultiplyByQuantizedMultiplierGreaterThanOne(
+ int32 x, int32 quantized_multiplier, int left_shift) {
+ using gemmlowp::SaturatingRoundingDoublingHighMul;
+ return SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
+ quantized_multiplier);
+}
+
+#endif // ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMMON_H_
diff --git a/contrib/labs/kerneltesting/conv2d/compatibility.h b/contrib/labs/kerneltesting/conv2d/compatibility.h
new file mode 100644
index 000000000..db8ba04bc
--- /dev/null
+++ b/contrib/labs/kerneltesting/conv2d/compatibility.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMPATIBILITY_H_
+#define ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMPATIBILITY_H_
+
+#ifndef ANDROID_ML_NN_COMPATIBILITY
+#define ANDROID_ML_NN_COMPATIBILITY
+
+#include <cassert>
+#include <cstdint>
+
+#ifndef DCHECK
+#define DCHECK(condition) (condition) ? (void)0 : assert(false)
+#endif
+
+#ifndef DCHECK_EQ
+#define DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : assert(false)
+#endif
+
+#ifndef DCHECK_GE
+#define DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : assert(false)
+#endif
+
+#ifndef DCHECK_GT
+#define DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : assert(false)
+#endif
+
+#ifndef DCHECK_LE
+#define DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : assert(false)
+#endif
+
+#ifndef DCHECK_LT
+#define DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : assert(false)
+#endif
+
+#ifndef CHECK_EQ
+#define CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : assert(false)
+#endif
+
+using uint8 = std::uint8_t;
+using int16 = std::int16_t;
+using uint16 = std::uint16_t;
+using int32 = std::int32_t;
+using uint32 = std::uint32_t;
+
+#endif
+
+#endif // ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMPATIBILITY_H_
diff --git a/contrib/labs/kerneltesting/conv2d/io_accessor.cpp b/contrib/labs/kerneltesting/conv2d/io_accessor.cpp
new file mode 100644
index 000000000..6d3cd9d04
--- /dev/null
+++ b/contrib/labs/kerneltesting/conv2d/io_accessor.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "io_accessor.h"
+
+InputAccessor::InputAccessor(const float* inputData, const Shape& inputShape)
+ : _inputData(inputData)
+ , _inputShape(inputShape)
+{
+}
+
+WeightAccessor::WeightAccessor(const float* filterData, const Shape& filterShape)
+ : _filterData(filterData)
+ , _filterShape(filterShape)
+{
+}
+
+BiasAccessor::BiasAccessor(const float* biasData, const Shape& biasShape)
+ : _biasData(biasData)
+ , _biasShape(biasShape)
+{
+}
+
+OutputAccessor::OutputAccessor(float* outputData, const Shape& outputShape)
+ : _outputData(outputData)
+ , _outputShape(outputShape)
+{
+}
+
+bool InputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ uint32_t width = getSizeOfDimension(_inputShape, 2);
+ uint32_t offset = id.y() * width + id.x();
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+ *(_inputData + offset);
+ });
+ return true;
+}
+
+bool WeightAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ uint32_t width = getSizeOfDimension(_filterShape, 2);
+ uint32_t offset = id.y() * width + id.x();
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+ *(_filterData + offset);
+ });
+ return true;
+}
+
+bool BiasAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ uint32_t width = getSizeOfDimension(_biasShape, 2);
+ uint32_t offset = id.y() * width + id.x();
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+ *(_biasData + offset);
+ });
+ return true;
+}
+
+bool OutputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ uint32_t width = getSizeOfDimension(_outputShape, 2);
+ uint32_t offset = id.y() * width + id.x();
+ *(_outputData + offset) =
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id));
+ });
+ return false; // end the network
+}
diff --git a/contrib/labs/kerneltesting/conv2d/io_accessor.h b/contrib/labs/kerneltesting/conv2d/io_accessor.h
new file mode 100644
index 000000000..0201f7242
--- /dev/null
+++ b/contrib/labs/kerneltesting/conv2d/io_accessor.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __CONV2D_IO_ACCESSOR_H__
+#define __CONV2D_IO_ACCESSOR_H__
+
+#include <arm_compute/graph/ITensorAccessor.h>
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+#include "types.h"
+
+class InputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ InputAccessor(const float* inputData, const Shape& inputShape);
+ InputAccessor(InputAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ const float* _inputData;
+ const Shape& _inputShape;
+};
+
+class WeightAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ WeightAccessor(const float* filterData, const Shape& filterShape);
+ WeightAccessor(WeightAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ const float* _filterData;
+ const Shape& _filterShape;
+};
+
+class BiasAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ BiasAccessor(const float* biasData, const Shape& biasShape);
+ BiasAccessor(BiasAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ const float* _biasData;
+ const Shape& _biasShape;
+};
+
+class OutputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ OutputAccessor(float* outputData, const Shape& outputShape);
+ OutputAccessor(OutputAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ float* _outputData;
+ const Shape& _outputShape;
+};
+
+#endif // __CONV2D_IO_ACCESSOR_H__
diff --git a/contrib/labs/kerneltesting/conv2d/nnfw_conv2d_test.cpp b/contrib/labs/kerneltesting/conv2d/nnfw_conv2d_test.cpp
new file mode 100644
index 000000000..190be016e
--- /dev/null
+++ b/contrib/labs/kerneltesting/conv2d/nnfw_conv2d_test.cpp
@@ -0,0 +1,607 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <iostream>
+#include <vector>
+#include <cassert>
+
+#include <Eigen/Core>
+#include <gemmlowp.h>
+
+#include "types.h"
+#include "common.h"
+#include "optimized_ops.h"
+#include "OperationUtils.h"
+
+#include <arm_compute/graph.h>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+#include <arm_compute/runtime/CL/functions/CLConvolution.h>
+
+#include "io_accessor.h"
+#include "misc/environment.h"
+
+static constexpr int kStaticBufferSize = 1605632;
+static char static_scratch_buffer[kStaticBufferSize];
+
+#define ANDROID_NN_CONV_PARAMETERS(Type) \
+ uint32_t height = getSizeOfDimension(inputShape, 1); \
+ uint32_t width = getSizeOfDimension(inputShape, 2); \
+ uint32_t filterHeight = getSizeOfDimension(filterShape, 1); \
+ uint32_t filterWidth = getSizeOfDimension(filterShape, 2); \
+ uint32_t outHeight = getSizeOfDimension(outputShape, 1); \
+ uint32_t outWidth = getSizeOfDimension(outputShape, 2); \
+ uint32_t inDepth = getSizeOfDimension(inputShape, 3); \
+ \
+ uint32_t paddingHeight = (uint32_t)padding_top; \
+ uint32_t paddingWidth = (uint32_t)padding_left; \
+ \
+ Dims<4> im2colDim; \
+ im2colDim.sizes[3] = (int)getSizeOfDimension(outputShape, 0); \
+ im2colDim.sizes[2] = (int)getSizeOfDimension(outputShape, 1); \
+ im2colDim.sizes[1] = (int)getSizeOfDimension(outputShape, 2); \
+ im2colDim.sizes[0] = (int)inDepth * filterHeight * filterWidth; \
+ \
+ im2colDim.strides[0] = 1; \
+ for (int i=1; i<4; i++) { \
+ im2colDim.strides[i] = im2colDim.strides[i-1] * im2colDim.sizes[i-1]; \
+ } \
+ \
+ Type* im2colData = nullptr; \
+ int im2colByteSize = sizeof(Type); \
+ for (int i=0; i<4; i++) { \
+ im2colByteSize *= im2colDim.sizes[i]; \
+ } \
+ if (im2colByteSize <= kStaticBufferSize) { \
+ im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \
+ } else { \
+ im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \
+ }
+
+
+bool convFloat32(const float* inputData, const Shape& inputShape,
+ const float* filterData, const Shape& filterShape,
+ const float* biasData, const Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t activation,
+ float* outputData, const Shape& outputShape) {
+
+ ANDROID_NN_CONV_PARAMETERS(float)
+
+ #define ANDROID_NN_CONV(activation) \
+ Conv<FusedActivationFunctionType::activation>( \
+ inputData, convertShapeToDims(inputShape), \
+ filterData, convertShapeToDims(filterShape), \
+ biasData, convertShapeToDims(biasShape), \
+ stride_width, stride_height, paddingWidth, paddingHeight, \
+ outputData, convertShapeToDims(outputShape), \
+ im2colData, im2colDim)
+
+ ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_CONV)
+
+ #undef ANDROID_NN_CONV
+
+ if (im2colByteSize > kStaticBufferSize) {
+ delete[] im2colData;
+ }
+ return true;
+}
+
+//-----------------------------------------------------------------------------
+
+using arm_compute::DataType;
+using arm_compute::graph::Target;
+using arm_compute::graph::TensorDescriptor;
+using arm_compute::TensorShape;
+using arm_compute::graph::frontend::InputLayer;
+using arm_compute::graph::frontend::OutputLayer;
+
+namespace acl_graph {
+
+bool convFloat32(const float* inputData, const Shape& inputShape,
+ const float* filterData, const Shape& filterShape,
+ const float* biasData, const Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t activation,
+ float* outputData, const Shape& outputShape)
+{
+ // Try with simple build-run with ACL Layer
+ arm_compute::graph::frontend::Stream graph{0, "ACL_CONV2D_TEST"};
+
+ Target target_hint = nnfw::misc::get_env_int("NNFW_ACL_USENEON")
+ ? Target::NEON : Target::CL;
+
+ // Not sure about which index is which value
+ uint32_t tsi_c = getSizeOfDimension(inputShape, 0);
+ uint32_t tsi_h = getSizeOfDimension(inputShape, 1);
+ uint32_t tsi_w = getSizeOfDimension(inputShape, 2);
+ uint32_t tsi_n = getSizeOfDimension(inputShape, 3);
+
+ uint32_t tsk_h = getSizeOfDimension(filterShape, 1);
+ uint32_t tsk_w = getSizeOfDimension(filterShape, 2);
+ uint32_t tsk_n = getSizeOfDimension(filterShape, 3);
+
+ graph << target_hint
+ << InputLayer(TensorDescriptor(TensorShape(tsi_w, tsi_h, tsi_c, tsi_n), DataType::F32),
+ std::unique_ptr<InputAccessor>(new InputAccessor(inputData, inputShape)))
+ << arm_compute::graph::frontend::ConvolutionLayer(
+ tsk_w, tsk_h, tsk_n,
+ std::unique_ptr<WeightAccessor>(new WeightAccessor(filterData, filterShape)),
+ std::unique_ptr<BiasAccessor>(new BiasAccessor(biasData, biasShape)),
+ arm_compute::PadStrideInfo(stride_width, stride_height, padding_top, padding_bottom))
+ ;
+ if (activation != static_cast<int32_t>(FusedActivationFunc::NONE)) {
+ arm_compute::ActivationLayerInfo::ActivationFunction actFunc =
+ arm_compute::ActivationLayerInfo::ActivationFunction::RELU;
+
+ graph << arm_compute::graph::frontend::ActivationLayer(arm_compute::ActivationLayerInfo(actFunc));
+ // Activation does not provide output Tensor and makes next layer fail to add to graph
+ // when it's the last(output) layer. To solve this, need to add a dummy layer.
+ uint32_t tso_c = getSizeOfDimension(outputShape, 0);
+ uint32_t tso_h = getSizeOfDimension(outputShape, 1);
+ uint32_t tso_w = getSizeOfDimension(outputShape, 2);
+ uint32_t tso_n = getSizeOfDimension(outputShape, 3);
+ graph << arm_compute::graph::frontend::ReshapeLayer(TensorShape(tso_w, tso_h, tso_c, tso_n));
+ }
+ graph << OutputLayer(std::unique_ptr<OutputAccessor>(new OutputAccessor(outputData, outputShape)))
+ ;
+
+ graph.run();
+
+ return true;
+}
+
+} // namespace acl_graph
+
+//-----------------------------------------------------------------------------
+
+using arm_compute::TensorInfo;
+
+namespace acl_runtime {
+
+TensorShape calculate_convolution_layer_output_shape(
+ const arm_compute::TensorShape &input_shape,
+ const arm_compute::TensorShape &weights_shape,
+ const arm_compute::PadStrideInfo &conv_info)
+{
+ unsigned int output_width = 0;
+ unsigned int output_height = 0;
+
+ // Get output width and height
+ std::tie(output_width, output_height) =
+ arm_compute::scaled_dimensions(
+ input_shape.x(), input_shape.y(),
+ weights_shape.x(), weights_shape.y(),
+ conv_info);
+
+ // Create output shape
+ TensorShape output_shape = input_shape;
+ output_shape.set(0, output_width);
+ output_shape.set(1, output_height);
+ output_shape.set(2, weights_shape[3]);
+
+ return output_shape;
+}
+
+bool convFloat32(const float* inputData, const Shape& inputShape,
+ const float* filterData, const Shape& filterShape,
+ const float* biasData, const Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t activation,
+ float* outputData, const Shape& outputShape)
+{
+ arm_compute::CLScheduler::get().default_init();
+
+ uint32_t tsi_c = getSizeOfDimension(inputShape, 0);
+ uint32_t tsi_h = getSizeOfDimension(inputShape, 1);
+ uint32_t tsi_w = getSizeOfDimension(inputShape, 2);
+ uint32_t tsi_n = getSizeOfDimension(inputShape, 3);
+
+ uint32_t tsk_h = getSizeOfDimension(filterShape, 1);
+ uint32_t tsk_w = getSizeOfDimension(filterShape, 2);
+ uint32_t tsk_n = getSizeOfDimension(filterShape, 3);
+
+ TensorShape input_shape = TensorShape(tsi_w, tsi_h, tsi_c, tsi_n);
+ TensorShape filter_shape = TensorShape(tsi_w, tsi_h, tsi_c, tsi_n);
+ arm_compute::PadStrideInfo conv_info =
+ arm_compute::PadStrideInfo(stride_width, stride_height, padding_top, padding_bottom);
+
+ TensorShape output_shape = calculate_convolution_layer_output_shape(
+ input_shape, filter_shape, conv_info);
+
+ uint32_t tso_c = output_shape[0];
+ uint32_t tso_w = output_shape[1];
+ uint32_t tso_h = output_shape[2];
+ uint32_t tso_n = output_shape[3];
+
+ arm_compute::CLTensor input, output, bias, filter;
+
+ input.allocator()->init(TensorInfo(tsi_w, tsi_h, arm_compute::Format::F32));
+ output.allocator()->init(TensorInfo(tso_w, tso_h, arm_compute::Format::F32));
+ bias.allocator()->init(TensorInfo(tso_w, tso_h, arm_compute::Format::F32));
+ filter.allocator()->init(TensorInfo(tsk_w, tsk_h, arm_compute::Format::F32));
+
+ input.allocator()->allocate();
+ output.allocator()->allocate();
+ bias.allocator()->allocate();
+ filter.allocator()->allocate();
+
+ input.map();
+ InputAccessor ia(inputData, inputShape);
+ ia.access_tensor(input);
+ input.unmap();
+
+ bias.map();
+ BiasAccessor ba(biasData, biasShape);
+ ba.access_tensor(bias);
+ bias.unmap();
+
+ filter.map();
+ WeightAccessor fa(filterData, filterShape);
+ fa.access_tensor(filter);
+ filter.unmap();
+
+ arm_compute::CLConvolutionLayer conv_f;
+ conv_f.configure(&input, &filter, &bias, &output, conv_info);
+
+ arm_compute::CLScheduler::get().sync();
+
+ conv_f.run();
+
+ output.map();
+ OutputAccessor oa(outputData, outputShape);
+ oa.access_tensor(output);
+ output.unmap();
+
+ return true;
+}
+
+} // namespace acl_runtime
+
+//-----------------------------------------------------------------------------
+
+enum COMPUTE_TYPE {
+ COMPUTE_DEFAULT = 0,
+ COMPUTE_ACLGRAPH,
+ COMPUTE_ACLRT
+};
+
+bool convFloat32(const float* inputData, const Shape& inputShape,
+ const float* filterData, const Shape& filterShape,
+ const float* biasData, const Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t activation,
+ float* outputData, const Shape& outputShape,
+ COMPUTE_TYPE compType) {
+
+ switch (compType)
+ {
+ case COMPUTE_DEFAULT :
+ return convFloat32(inputData, inputShape, filterData, filterShape,
+ biasData, biasShape, padding_left, padding_right,
+ padding_top, padding_bottom, stride_width, stride_height,
+ activation, outputData, outputShape);
+
+ case COMPUTE_ACLGRAPH :
+ return acl_graph::convFloat32(inputData, inputShape, filterData, filterShape,
+ biasData, biasShape, padding_left, padding_right,
+ padding_top, padding_bottom, stride_width, stride_height,
+ activation, outputData, outputShape);
+
+ case COMPUTE_ACLRT :
+ return acl_runtime::convFloat32(inputData, inputShape, filterData, filterShape,
+ biasData, biasShape, padding_left, padding_right,
+ padding_top, padding_bottom, stride_width, stride_height,
+ activation, outputData, outputShape);
+ }
+ return false;
+}
+
+//-----------------------------------------------------------------------------
+
+void dumpData(const char* name, const float* data, const Shape& shape)
+{
+ uint32_t height = getSizeOfDimension(shape, 1);
+ uint32_t width = getSizeOfDimension(shape, 2);
+
+ std::cout << "---" << name << "---" << std::endl;
+ for (int h = 0; h < height; h++) {
+ std::cout << "H=" << h << " | ";
+ for (int w = 0; w < width; w++) {
+ std::cout << data[h * width + w] << ",";
+ }
+ std::cout << std::endl;
+ }
+}
+
+void initData(float* outputData, int num, float value)
+{
+ for (int i = 0; i < num; i++) {
+ *(outputData + i) = value;
+ }
+}
+
+void initDataSeq(float* outputData, int num, float value)
+{
+ for (int i = 0; i < num; i++) {
+ *(outputData + i) = value;
+ value += 1.0;
+ }
+}
+
+// compareData
+// return true if result == expected with the shape info,
+// otherwise false
+bool compareData(const float* result, const float* expected, const Shape& shape)
+{
+ NN_CHECK_EQ(shape.dimensions.size(), 4);
+
+ uint32_t height = getSizeOfDimension(shape, 1);
+ uint32_t width = getSizeOfDimension(shape, 2);
+ uint32_t numitems = height * width;
+ for (int item = 0; item < numitems; item++) {
+ if (*(result + item) != *(expected + item)) {
+ LOG(ERROR) << "compareData failed: result " << *(result + item)
+ << ", expected " << *(expected + item) << std::endl;
+ return false;
+ }
+ }
+ return true;
+}
+
+int test_3x3_1x1_one(COMPUTE_TYPE comptype)
+{
+ float inputData[9];
+ const Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float filterData[9];
+ const Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float biasData[1] = { 1.0 };
+ const Shape biasShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float* outputData = new float[9];
+ const Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ float* expectData = new float[9];
+ bool bret;
+
+ initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+ initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+ initData(expectData, sizeof(expectData) / sizeof(expectData[0]), 0.0);
+
+ bret = convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ expectData, outputShape,
+ COMPUTE_DEFAULT);
+
+ bret = convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape,
+ comptype);
+
+ dumpData("Input ", inputData, inputShape);
+ dumpData("Filter ", filterData, filterShape);
+ dumpData("Bias ", biasData, biasShape);
+ dumpData("Output ", outputData, outputShape);
+ std::cout << std::endl;
+
+ bret = compareData(outputData, expectData, outputShape);
+
+ delete outputData;
+ delete expectData;
+
+ if (!bret)
+ {
+ LOG(ERROR) << "TEST FAILED " << __FUNCTION__ << std::endl;
+ return -1;
+ }
+ return 0;
+}
+
+int test_3x3_3x3_one(void)
+{
+ float inputData[9];
+ const Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float filterData[9];
+ const Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float biasData[1] = { 1.0 };
+ const Shape biasShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float* outputData = new float[9];
+ const Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float* expectData = new float[9];
+ bool bret;
+
+ initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+ initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+ initData(expectData, sizeof(expectData) / sizeof(expectData[0]), 0.0);
+
+ bret = convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ expectData, outputShape,
+ COMPUTE_DEFAULT);
+
+ bret = convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape,
+ COMPUTE_ACLGRAPH);
+
+ dumpData("Input ", inputData, inputShape);
+ dumpData("Filter ", filterData, filterShape);
+ dumpData("Bias ", biasData, biasShape);
+ dumpData("Output ", outputData, outputShape);
+ std::cout << std::endl;
+
+ bret = compareData(outputData, expectData, outputShape);
+
+ delete outputData;
+ delete expectData;
+
+ if (!bret)
+ {
+ LOG(ERROR) << "TEST FAILED " << __FUNCTION__ << std::endl;
+ return -1;
+ }
+ return 0;
+}
+
+int test_3x3_3x3_seq(void)
+{
+ float inputData[9];
+ const Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float filterData[9];
+ const Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float biasData[1] = { 1.0 };
+ const Shape biasShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float* outputData = new float[9];
+ const Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float* expectData = new float[9];
+ bool bret;
+
+ initDataSeq(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ initDataSeq(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+ initDataSeq(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+ initData(expectData, sizeof(expectData) / sizeof(expectData[0]), 0.0);
+
+ bret = convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ expectData, outputShape,
+ COMPUTE_DEFAULT);
+
+ bret = convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape,
+ COMPUTE_ACLGRAPH);
+
+ dumpData("Input ", inputData, inputShape);
+ dumpData("Filter ", filterData, filterShape);
+ dumpData("Bias ", biasData, biasShape);
+ dumpData("Output ", outputData, outputShape);
+ std::cout << std::endl;
+
+ bret = compareData(outputData, expectData, outputShape);
+
+ delete outputData;
+ delete expectData;
+
+ if (!bret)
+ {
+ LOG(ERROR) << "TEST FAILED " << __FUNCTION__ << std::endl;
+ return -1;
+ }
+ return 0;
+}
+
+int main(int argc, char* argv[])
+{
+ int result;
+
+ // input 3x3, output 1x1, all data 1.0
+ result = test_3x3_1x1_one(COMPUTE_ACLGRAPH);
+ if (result) return result;
+ result = test_3x3_1x1_one(COMPUTE_ACLRT);
+ if (result) return result;
+
+ // input 3x3, output 3x3, all data 1.0
+ result = test_3x3_3x3_one();
+ if (result) return result;
+
+ result = test_3x3_3x3_seq();
+ if (result) return result;
+
+ return result;
+}
diff --git a/contrib/labs/kerneltesting/conv2d/optimized_ops.h b/contrib/labs/kerneltesting/conv2d/optimized_ops.h
new file mode 100644
index 000000000..1d8c4ff28
--- /dev/null
+++ b/contrib/labs/kerneltesting/conv2d/optimized_ops.h
@@ -0,0 +1,339 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_OPS_H_
+#define ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_OPS_H_
+
+// Make a local VectorMap typedef allowing to map a float array
+// as a Eigen matrix expression. The same explanation as for VectorMap
+// above also applies here.
+template <typename Scalar>
+using MatrixMap = typename std::conditional<
+ std::is_const<Scalar>::value,
+ Eigen::Map<const Eigen::Matrix<typename std::remove_const<Scalar>::type,
+ Eigen::Dynamic, Eigen::Dynamic>>,
+ Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>>>::type;
+
+template <typename Scalar, int N>
+MatrixMap<Scalar> MapAsMatrixWithFirstDimAsRows(Scalar* data,
+ const Dims<N>& dims) {
+ const int rows = dims.sizes[0];
+ int cols = 1;
+ for (int d = 1; d < N; d++) {
+ cols *= dims.sizes[d];
+ }
+ return MatrixMap<Scalar>(data, rows, cols);
+}
+
+template <typename Scalar, int N>
+MatrixMap<Scalar> MapAsMatrixWithLastDimAsCols(Scalar* data,
+ const Dims<N>& dims) {
+ const int cols = dims.sizes[N - 1];
+ int rows = 1;
+ for (int d = 0; d < N - 1; d++) {
+ rows *= dims.sizes[d];
+ }
+ return MatrixMap<Scalar>(data, rows, cols);
+}
+
+template <typename T>
+inline void ExtractPatchIntoBufferColumn(
+ const Dims<4>& input_dims, int w, int h, int b, int kheight, int kwidth,
+ int stride_width, int stride_height, int pad_width, int pad_height,
+ int in_width, int in_height, int in_depth, int single_buffer_length,
+ int buffer_id, const T* in_data, T* conv_buffer_data, uint8 byte_zero) {
+ gemmlowp::ScopedProfilingLabel label("ExtractPatchIntoBufferColumn");
+ // This chunk of code reshapes all the inputs corresponding to
+ // output (b, h, w) to a column vector in conv_buffer(:, buffer_id).
+ const int kwidth_times_indepth = kwidth * in_depth;
+ const int inwidth_times_indepth = in_width * in_depth;
+ const int ih_ungated_start = h * stride_height - pad_height;
+ const int ih_ungated_end = (ih_ungated_start + kheight);
+ const int ih_end = std::min(ih_ungated_end, in_height);
+ const int iw_ungated_start = w * stride_width - pad_width;
+ const int iw_ungated_end = (iw_ungated_start + kwidth);
+ const int iw_end = std::min(iw_ungated_end, in_width);
+ // If the patch is off the edge of the input image, skip writing those rows
+ // and columns from the patch into the output array.
+ const int h_offset = std::max(0, -ih_ungated_start);
+ const int w_offset = std::max(0, -iw_ungated_start);
+ const int ih_start = std::max(0, ih_ungated_start);
+ const int iw_start = std::max(0, iw_ungated_start);
+ const int single_row_num =
+ std::min(kwidth - w_offset, in_width - iw_start) * in_depth;
+ const int output_row_offset = (buffer_id * single_buffer_length);
+ int out_offset =
+ output_row_offset + (h_offset * kwidth + w_offset) * in_depth;
+ int in_offset = Offset(input_dims, 0, iw_start, ih_start, b);
+
+ // Express all of the calculations as padding around the input patch.
+ const int top_padding = h_offset;
+ const int bottom_padding = (ih_ungated_end - ih_end);
+ const int left_padding = w_offset;
+ const int right_padding = (iw_ungated_end - iw_end);
+ assert(single_row_num ==
+ ((kwidth - (left_padding + right_padding)) * in_depth));
+
+ // Write out zeroes to the elements representing the top rows of the input
+ // patch that are off the edge of the input image.
+ if (top_padding > 0) {
+ const int top_row_elements = (top_padding * kwidth * in_depth);
+ memset(conv_buffer_data + output_row_offset, byte_zero,
+ (top_row_elements * sizeof(T)));
+ }
+
+ // If the patch is on the interior of the input image horizontally, just copy
+ // over the rows sequentially, otherwise add zero padding at the start or end.
+ if ((left_padding == 0) && (right_padding == 0)) {
+ for (int ih = ih_start; ih < ih_end; ++ih) {
+ memcpy(conv_buffer_data + out_offset, in_data + in_offset,
+ single_row_num * sizeof(T));
+ out_offset += kwidth_times_indepth;
+ in_offset += inwidth_times_indepth;
+ }
+ } else {
+ for (int ih = ih_start; ih < ih_end; ++ih) {
+ if (left_padding > 0) {
+ const int left_start = (out_offset - (left_padding * in_depth));
+ memset(conv_buffer_data + left_start, byte_zero,
+ (left_padding * in_depth * sizeof(T)));
+ }
+ memcpy(conv_buffer_data + out_offset, in_data + in_offset,
+ single_row_num * sizeof(T));
+ if (right_padding > 0) {
+ const int right_start = (out_offset + single_row_num);
+ memset(conv_buffer_data + right_start, byte_zero,
+ (right_padding * in_depth * sizeof(T)));
+ }
+ out_offset += kwidth_times_indepth;
+ in_offset += inwidth_times_indepth;
+ }
+ }
+
+ // If the bottom of the patch falls off the input image, pad the values
+ // representing those input rows with zeroes.
+ if (bottom_padding > 0) {
+ const int bottom_row_elements = (bottom_padding * kwidth * in_depth);
+ const int bottom_start =
+ output_row_offset +
+ ((top_padding + (ih_end - ih_start)) * kwidth * in_depth);
+ memset(conv_buffer_data + bottom_start, byte_zero,
+ (bottom_row_elements * sizeof(T)));
+ }
+}
+
+#ifdef USE_NEON
+template <FusedActivationFunctionType Ac>
+void AddBiasAndEvalActivationFunction(const float* bias_data,
+ const Dims<4>& bias_dims,
+ float* array_data,
+ const Dims<4>& array_dims) {
+ gemmlowp::ScopedProfilingLabel label("AddBiasAndEvalActivationFunction");
+ const int bias_size = bias_dims.sizes[3] * bias_dims.strides[3];
+ const int array_size = array_dims.sizes[3] * array_dims.strides[3];
+ DCHECK_EQ((array_size % bias_size), 0);
+ float* array_ptr = array_data;
+ float* array_end_ptr = array_ptr + array_size;
+ const auto zero = vdupq_n_f32(0);
+ const auto six = vdupq_n_f32(6);
+ const auto neg_one = vdupq_n_f32(-1);
+ const auto one = vdupq_n_f32(1);
+ for (; array_ptr != array_end_ptr; array_ptr += bias_size) {
+ int i = 0;
+ for (; i <= bias_size - 16; i += 16) {
+ auto b0 = vld1q_f32(bias_data + i);
+ auto b1 = vld1q_f32(bias_data + i + 4);
+ auto b2 = vld1q_f32(bias_data + i + 8);
+ auto b3 = vld1q_f32(bias_data + i + 12);
+ auto a0 = vld1q_f32(array_ptr + i);
+ auto a1 = vld1q_f32(array_ptr + i + 4);
+ auto a2 = vld1q_f32(array_ptr + i + 8);
+ auto a3 = vld1q_f32(array_ptr + i + 12);
+ auto x0 = vaddq_f32(a0, b0);
+ auto x1 = vaddq_f32(a1, b1);
+ auto x2 = vaddq_f32(a2, b2);
+ auto x3 = vaddq_f32(a3, b3);
+ if (Ac == FusedActivationFunctionType::kRelu ||
+ Ac == FusedActivationFunctionType::kRelu6) {
+ x0 = vmaxq_f32(zero, x0);
+ x1 = vmaxq_f32(zero, x1);
+ x2 = vmaxq_f32(zero, x2);
+ x3 = vmaxq_f32(zero, x3);
+ if (Ac == FusedActivationFunctionType::kRelu6) {
+ x0 = vminq_f32(six, x0);
+ x1 = vminq_f32(six, x1);
+ x2 = vminq_f32(six, x2);
+ x3 = vminq_f32(six, x3);
+ }
+ } else if (Ac == FusedActivationFunctionType::kRelu1) {
+ x0 = vmaxq_f32(neg_one, x0);
+ x1 = vmaxq_f32(neg_one, x1);
+ x2 = vmaxq_f32(neg_one, x2);
+ x3 = vmaxq_f32(neg_one, x3);
+ x0 = vminq_f32(one, x0);
+ x1 = vminq_f32(one, x1);
+ x2 = vminq_f32(one, x2);
+ x3 = vminq_f32(one, x3);
+ }
+ vst1q_f32(array_ptr + i, x0);
+ vst1q_f32(array_ptr + i + 4, x1);
+ vst1q_f32(array_ptr + i + 8, x2);
+ vst1q_f32(array_ptr + i + 12, x3);
+ }
+ for (; i <= bias_size - 4; i += 4) {
+ auto b = vld1q_f32(bias_data + i);
+ auto a = vld1q_f32(array_ptr + i);
+ auto x = vaddq_f32(a, b);
+ if (Ac == FusedActivationFunctionType::kRelu ||
+ Ac == FusedActivationFunctionType::kRelu6) {
+ x = vmaxq_f32(zero, x);
+ if (Ac == FusedActivationFunctionType::kRelu6) {
+ x = vminq_f32(six, x);
+ }
+ } else if (Ac == FusedActivationFunctionType::kRelu1) {
+ x = vmaxq_f32(neg_one, x);
+ x = vminq_f32(one, x);
+ }
+ vst1q_f32(array_ptr + i, x);
+ }
+ for (; i < bias_size; i++) {
+ array_ptr[i] = ActivationFunction<Ac>(array_ptr[i] + bias_data[i]);
+ }
+ }
+}
+#else // not NEON
+template <FusedActivationFunctionType Ac>
+void AddBiasAndEvalActivationFunction(const float* bias_data,
+ const Dims<4>& bias_dims,
+ float* array_data,
+ const Dims<4>& array_dims) {
+ gemmlowp::ScopedProfilingLabel label("AddBiasAndEvalActivationFunction");
+ const int bias_size = bias_dims.sizes[3] * bias_dims.strides[3];
+ const int array_size = array_dims.sizes[3] * array_dims.strides[3];
+ DCHECK_EQ((array_size % bias_size), 0);
+ for (int array_offset = 0; array_offset < array_size;
+ array_offset += bias_size) {
+ for (int i = 0; i < bias_size; i++) {
+ array_data[array_offset + i] =
+ ActivationFunction<Ac>(array_data[array_offset + i] + bias_data[i]);
+ }
+ }
+}
+#endif
+
+template <typename Lhs, typename Rhs, typename Result>
+void Gemm(const Eigen::MatrixBase<Lhs>& lhs, const Eigen::MatrixBase<Rhs>& rhs,
+ Eigen::MatrixBase<Result>* result) {
+ if (rhs.cols() == 1) {
+ gemmlowp::ScopedProfilingLabel label("GEMV");
+ result->col(0).noalias() = lhs * rhs.col(0);
+ } else {
+ gemmlowp::ScopedProfilingLabel label("GEMM");
+ result->noalias() = lhs * rhs;
+ }
+}
+
+template <typename T>
+void Im2col(const T* input_data, const Dims<4>& input_dims, int stride_width,
+ int stride_height, int pad_width, int pad_height, int kheight,
+ int kwidth, uint8 byte_zero, T* output_data,
+ const Dims<4>& output_dims) {
+ gemmlowp::ScopedProfilingLabel label("Im2col");
+ DCHECK(IsPackedWithoutStrides(input_dims));
+ DCHECK(IsPackedWithoutStrides(output_dims));
+ const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
+ const int input_depth = ArraySize(input_dims, 0);
+ const int input_width = ArraySize(input_dims, 1);
+ const int input_height = ArraySize(input_dims, 2);
+ const int output_depth = ArraySize(output_dims, 0);
+ const int output_width = ArraySize(output_dims, 1);
+ const int output_height = ArraySize(output_dims, 2);
+
+ int buffer_id = 0;
+ // Loop over the output nodes.
+ for (int b = 0; b < batches; ++b) {
+ for (int h = 0; h < output_height; ++h) {
+ for (int w = 0; w < output_width; ++w) {
+ ExtractPatchIntoBufferColumn(
+ input_dims, w, h, b, kheight, kwidth, stride_width, stride_height,
+ pad_width, pad_height, input_width, input_height, input_depth,
+ output_depth, buffer_id, input_data, output_data, byte_zero);
+ ++buffer_id;
+ }
+ }
+ }
+}
+
+template <FusedActivationFunctionType Ac>
+void Conv(const float* input_data, const Dims<4>& input_dims,
+ const float* filter_data, const Dims<4>& filter_dims,
+ const float* bias_data, const Dims<4>& bias_dims, int stride_width,
+ int stride_height, int pad_width, int pad_height, float* output_data,
+ const Dims<4>& output_dims, float* im2col_data,
+ const Dims<4>& im2col_dims) {
+ (void)im2col_data;
+ (void)im2col_dims;
+ gemmlowp::ScopedProfilingLabel label("Conv");
+
+ const float* gemm_input_data = nullptr;
+ const Dims<4>* gemm_input_dims = nullptr;
+ const int filter_width = ArraySize(filter_dims, 1);
+ const int filter_height = ArraySize(filter_dims, 2);
+ const bool need_im2col = stride_width != 1 || stride_height != 1 ||
+ filter_width != 1 || filter_height != 1;
+ if (need_im2col) {
+ DCHECK(im2col_data);
+ Im2col(input_data, input_dims, stride_width, stride_height, pad_width,
+ pad_height, filter_height, filter_width, 0, im2col_data,
+ im2col_dims);
+ gemm_input_data = im2col_data;
+ gemm_input_dims = &im2col_dims;
+ } else {
+ DCHECK(!im2col_data);
+ gemm_input_data = input_data;
+ gemm_input_dims = &input_dims;
+ }
+
+ const auto im2col_matrix_map =
+ MapAsMatrixWithFirstDimAsRows(gemm_input_data, *gemm_input_dims);
+ const auto filter_matrix_map =
+ MapAsMatrixWithLastDimAsCols(filter_data, filter_dims);
+ auto output_matrix_map =
+ MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
+
+ Gemm(filter_matrix_map.transpose(), im2col_matrix_map, &output_matrix_map);
+
+ AddBiasAndEvalActivationFunction<Ac>(bias_data, bias_dims, output_data,
+ output_dims);
+}
+
+#endif // ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_OPS_H_
diff --git a/contrib/labs/kerneltesting/conv2d/types.h b/contrib/labs/kerneltesting/conv2d/types.h
new file mode 100644
index 000000000..3d09457c7
--- /dev/null
+++ b/contrib/labs/kerneltesting/conv2d/types.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_TYPES_H_
+#define ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_TYPES_H_
+
+enum class OperandType : int32_t {
+ FLOAT32 = 0,
+ INT32 = 1,
+ UINT32 = 2,
+ TENSOR_FLOAT32 = 3,
+ TENSOR_INT32 = 4,
+ TENSOR_QUANT8_ASYMM = 5,
+ OEM = 10000,
+ TENSOR_OEM_BYTE = 10001,
+};
+
+#include "compatibility.h"
+
+enum class FusedActivationFunctionType { kNone, kRelu6, kRelu1, kRelu };
+
+template <int N>
+struct Dims {
+ int sizes[N];
+ int strides[N];
+};
+
+// The type and dimensions of an operand.
+struct Shape {
+ OperandType type;
+ std::vector<uint32_t> dimensions;
+ float scale;
+ int32_t offset;
+};
+
+inline uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx) {
+ if (dimensionIdx >= shape.dimensions.size()) {
+ // TODO, log the error
+ return 0;
+ }
+ return shape.dimensions[dimensionIdx];
+}
+
+inline Dims<4> convertShapeToDims(const Shape& shape) {
+ Dims<4> dims;
+ for (int i=0; i<4; i++) {
+ dims.sizes[i] = 1;
+ }
+
+ if (shape.dimensions.size() == 1) {
+ dims.sizes[0] = (int)getSizeOfDimension(shape, 0);
+ } else {
+ for (int i=0; i<4; i++) {
+ int src = (int)shape.dimensions.size()-i-1;
+ if (src >= 0) {
+ dims.sizes[i] = (int)getSizeOfDimension(shape, src);
+ }
+ }
+ }
+
+ dims.strides[0] = 1;
+ for (int i = 1; i<4; i++) {
+ dims.strides[i] = dims.strides[i-1] * dims.sizes[i-1];
+ }
+ return dims;
+}
+
+inline int Offset(const Dims<4>& dims, int i0, int i1, int i2, int i3) {
+ DCHECK(i0 >= 0 && i0 < dims.sizes[0]);
+ DCHECK(i1 >= 0 && i1 < dims.sizes[1]);
+ DCHECK(i2 >= 0 && i2 < dims.sizes[2]);
+ DCHECK(i3 >= 0 && i3 < dims.sizes[3]);
+ return i0 * dims.strides[0] + i1 * dims.strides[1] + i2 * dims.strides[2] +
+ i3 * dims.strides[3];
+}
+
+// Get array size, DCHECKing that the dim index is in range.
+template <int N>
+int ArraySize(const Dims<N>& array, int index) {
+ DCHECK(index >= 0 && index < N);
+ return array.sizes[index];
+}
+
+// Get common array size, DCHECKing that they all agree.
+template <typename ArrayType1, typename ArrayType2>
+int MatchingArraySize(const ArrayType1& array1, int index1,
+ const ArrayType2& array2, int index2) {
+ DCHECK_EQ(ArraySize(array1, index1), ArraySize(array2, index2));
+ return ArraySize(array1, index1);
+}
+
+template <typename ArrayType1, typename ArrayType2, typename... Args>
+int MatchingArraySize(const ArrayType1& array1, int index1,
+ const ArrayType2& array2, int index2, Args... args) {
+ DCHECK_EQ(ArraySize(array1, index1), ArraySize(array2, index2));
+ return MatchingArraySize(array1, index1, args...);
+}
+
+inline int RequiredBufferSizeForDims(const Dims<4>& dims) {
+ int max_offset = 0;
+ for (int i = 0; i < 4; i++) {
+ max_offset += (dims.sizes[i] - 1) * dims.strides[i];
+ }
+ return max_offset + 1;
+}
+
+template <int N>
+bool IsPackedWithoutStrides(const Dims<N>& dims) {
+ int expected_stride = 1;
+ for (int d = 0; d < N; d++) {
+ if (dims.strides[d] != expected_stride) return false;
+ expected_stride *= dims.sizes[d];
+ }
+ return true;
+}
+
+#endif // ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_TYPES_H_