summaryrefslogtreecommitdiff
path: root/compiler/circle-execution-plan
diff options
context:
space:
mode:
authorChunseok Lee <chunseok.lee@samsung.com>2022-04-15 19:15:11 +0900
committerChunseok Lee <chunseok.lee@samsung.com>2022-04-15 19:15:11 +0900
commit3ad689f0803519e343c36d5700646e86059df961 (patch)
tree862346c401a5577518fa7f042532aa931b53aa0e /compiler/circle-execution-plan
parentac6e4dd7b480e83b586ef533d7b29a8a97eb48fe (diff)
downloadnnfw-3ad689f0803519e343c36d5700646e86059df961.tar.gz
nnfw-3ad689f0803519e343c36d5700646e86059df961.tar.bz2
nnfw-3ad689f0803519e343c36d5700646e86059df961.zip
Imported Upstream version 1.20.0upstream/1.20.0submit/tizen/20220415.103159
Diffstat (limited to 'compiler/circle-execution-plan')
-rw-r--r--compiler/circle-execution-plan/CMakeLists.txt6
-rw-r--r--compiler/circle-execution-plan/README.md5
-rw-r--r--compiler/circle-execution-plan/pal/IScratchpadHelper.h51
-rw-r--r--compiler/circle-execution-plan/pal/ScratchpadHelperCMSISNN.h187
-rw-r--r--compiler/circle-execution-plan/pal/ScratchpadHelperLinux.h137
-rw-r--r--compiler/circle-execution-plan/pal/ScratchpadHelperMCU.h88
-rw-r--r--compiler/circle-execution-plan/pal/TargetPlatform.h38
-rw-r--r--compiler/circle-execution-plan/src/CircleExecutionPlan.cpp47
-rw-r--r--compiler/circle-execution-plan/src/ExecutionPlanner.cpp174
-rw-r--r--compiler/circle-execution-plan/src/ExecutionPlanner.h67
10 files changed, 709 insertions, 91 deletions
diff --git a/compiler/circle-execution-plan/CMakeLists.txt b/compiler/circle-execution-plan/CMakeLists.txt
index 115d24860..2f657c171 100644
--- a/compiler/circle-execution-plan/CMakeLists.txt
+++ b/compiler/circle-execution-plan/CMakeLists.txt
@@ -1,4 +1,9 @@
set(SOURCES
+ pal/IScratchpadHelper.h
+ pal/ScratchpadHelperLinux.h
+ pal/ScratchpadHelperMCU.h
+ pal/ScratchpadHelperCMSISNN.h
+ pal/TargetPlatform.h
src/CircleExecutionPlan.cpp
src/ExecutionPlanner.cpp
src/ExecutionPlanner.h
@@ -13,4 +18,5 @@ target_link_libraries(circle_execution_plan luci_export)
target_link_libraries(circle_execution_plan luci_plan)
target_link_libraries(circle_execution_plan arser)
+target_include_directories(circle_execution_plan PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/pal")
install(TARGETS circle_execution_plan DESTINATION bin)
diff --git a/compiler/circle-execution-plan/README.md b/compiler/circle-execution-plan/README.md
index e789a55db..dbb7d4f85 100644
--- a/compiler/circle-execution-plan/README.md
+++ b/compiler/circle-execution-plan/README.md
@@ -10,13 +10,12 @@ The output circle file contains plan (`CircleNodeMemoryPlan`) information for ev
- number which determines order in which nodes will be executed
- memory offsets for node output tensors from the beginning of shared memory buffer
-In order to record and read this metadata, we use `CircleImportMetadata` and `CircleExportMetadata`.
-For this purpose we use `std::map<uint32_t, std::vector<uint32_t>> _memory_plan_table` which for each node with key ID contains encoded `CircleNodeMemoryPlan` data.
+In order to record and read this data, we use `luci::CircleNodeExecutionPlan`.
### Execution plan building
In order to build "execution plan" we use `ExecutionPlanner` class.
-The main method is `get_execution_plan()` which for each node finds and writes to its annotations
+The main method is `make_execution_plan()` which for each node finds and writes to its annotations
"execution plan". For this purpose there are two steps:
- determining the order of execution of nodes, which is stored in `_ordered_nodes` vector.
Now for this purpose there is only one default method `get_default_execution_order_plan()` that uses `loco::postorder_traversal(const std::vector<loco::Node *> &roots)`.
diff --git a/compiler/circle-execution-plan/pal/IScratchpadHelper.h b/compiler/circle-execution-plan/pal/IScratchpadHelper.h
new file mode 100644
index 000000000..f5a991526
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/IScratchpadHelper.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_ISRCRATCHPAD_HELPER_H
+#define CIRCLE_EXECUTION_PLAN_ISRCRATCHPAD_HELPER_H
+
+#include <luci/IR/Nodes/CircleAveragePool2D.h>
+#include <luci/IR/Nodes/CircleBatchMatMul.h>
+#include <luci/IR/Nodes/CircleConv2D.h>
+#include <luci/IR/Nodes/CircleDepthwiseConv2D.h>
+#include <luci/IR/Nodes/CircleSVDF.h>
+#include <cstdint>
+
+namespace circle_planner
+{
+
+class IScratchpadHelper
+{
+public:
+ virtual uint32_t
+ ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) = 0;
+
+ virtual std::vector<uint32_t>
+ ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) = 0;
+
+ virtual uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *conv) = 0;
+
+ virtual uint32_t
+ ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) = 0;
+
+ virtual std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) = 0;
+
+ virtual ~IScratchpadHelper() = default;
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_ISRCRATCHPAD_HELPER_H
diff --git a/compiler/circle-execution-plan/pal/ScratchpadHelperCMSISNN.h b/compiler/circle-execution-plan/pal/ScratchpadHelperCMSISNN.h
new file mode 100644
index 000000000..5369c0937
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/ScratchpadHelperCMSISNN.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_CMSISNN_H
+#define CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_CMSISNN_H
+
+#include "IScratchpadHelper.h"
+#include <cassert>
+
+namespace circle_planner
+{
+
+namespace
+{
+
+inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
+ int32_t filter_size, int32_t out_size)
+{
+ const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+ const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
+ return padding > 0 ? padding : 0;
+}
+
+} // namespace
+
+class ScratchpadHelperCMSISNN : public IScratchpadHelper
+{
+public:
+ explicit ScratchpadHelperCMSISNN(bool use_dsp) : _use_dsp(use_dsp)
+ {
+ // Do nothing
+ }
+
+ uint32_t ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) final
+ {
+ // Main logic of arm_avgpool_s8_get_buffer_size
+
+ const auto avg_pool_input = loco::must_cast<luci::CircleNode *>(avg_pool->value());
+
+ if (avg_pool_input->dtype() != loco::DataType::S8 or !_use_dsp)
+ return 0;
+
+ const auto depth = static_cast<int32_t>(avg_pool_input->dim(3).value());
+
+ return depth * sizeof(int32_t);
+ }
+
+ std::vector<uint32_t>
+ ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) final
+ {
+ throw std::runtime_error("BatchMatMul is not currently supported for cmsisnn platform");
+ }
+
+ uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *conv) final
+ {
+ // Main logic of arm_convolve_wrapper_s8_get_buffer_size
+
+ const auto dilation_height_factor = static_cast<int32_t>(conv->dilation()->h());
+ const auto dilation_width_factor = static_cast<int32_t>(conv->dilation()->w());
+
+ const auto conv_input = loco::must_cast<luci::CircleNode *>(conv->input());
+ const auto filter = loco::must_cast<luci::CircleNode *>(conv->filter());
+
+ if (dilation_width_factor != 1 or dilation_height_factor != 1 or
+ conv_input->dtype() != loco::DataType::S8)
+ {
+ return 0;
+ }
+
+ const auto input_depth = static_cast<int32_t>(conv_input->dim(3).value());
+
+ const auto input_height = static_cast<int32_t>(conv_input->dim(1).value());
+ const auto input_width = static_cast<int32_t>(conv_input->dim(2).value());
+
+ const auto filter_height = static_cast<int32_t>(filter->dim(1).value());
+ const auto filter_width = static_cast<int32_t>(filter->dim(2).value());
+
+ const auto stride_height = static_cast<int32_t>(conv->stride()->h());
+ const auto stride_width = static_cast<int32_t>(conv->stride()->w());
+
+ const auto output_height = static_cast<int32_t>(conv->dim(1).value());
+ const auto output_width = static_cast<int32_t>(conv->dim(2).value());
+
+ assert(conv_input->quantparam()->zerop.size() == 1);
+ assert(conv->quantparam()->zerop.size() == 1);
+
+ const auto padding_height = computePadding(stride_height, dilation_height_factor, input_height,
+ filter_height, output_height);
+ const auto padding_width =
+ computePadding(stride_width, dilation_width_factor, input_width, filter_width, output_width);
+
+ if ((padding_width == 0) && (padding_height == 0) && (input_depth % 4 == 0) &&
+ (stride_width == 1) && (stride_height == 1) && (filter_width == 1) && (filter_height == 1))
+ {
+ return 0;
+ }
+
+ if (_use_dsp)
+ {
+ return (2 * input_depth * filter_width * filter_height) * sizeof(int16_t);
+ }
+
+ return 0;
+ }
+
+ uint32_t
+ ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) final
+ {
+ // Main logic of arm_depthwise_conv_wrapper_s8_get_buffer_size
+
+ const auto dilation_height_factor = static_cast<int32_t>(depthwise_conv->dilation()->h());
+ const auto dilation_width_factor = static_cast<int32_t>(depthwise_conv->dilation()->w());
+
+ const auto depthwise_conv_input = loco::must_cast<luci::CircleNode *>(depthwise_conv->input());
+ const auto filter = loco::must_cast<luci::CircleNode *>(depthwise_conv->filter());
+
+ if (dilation_width_factor != 1 or dilation_height_factor != 1 or
+ depthwise_conv_input->dtype() != loco::DataType::S8)
+ {
+ return 0;
+ }
+
+ const auto input_depth = static_cast<int32_t>(depthwise_conv_input->dim(3).value());
+ const auto output_depth = static_cast<int32_t>(depthwise_conv->dim(3).value());
+ const auto batch_size = static_cast<int32_t>(depthwise_conv_input->dim(0).value());
+
+ if (input_depth != output_depth or batch_size != 1 or !_use_dsp)
+ return 0;
+
+ const auto filter_height = static_cast<int32_t>(filter->dim(1).value());
+ const auto filter_width = static_cast<int32_t>(filter->dim(2).value());
+
+ return input_depth * filter_height * filter_width * sizeof(int16_t);
+ }
+
+ std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) final
+ {
+ const auto svdf_input = loco::must_cast<luci::CircleNode *>(svdf->input());
+ const auto weight_feature_input = loco::must_cast<luci::CircleNode *>(svdf->weight_feature());
+
+ if (svdf_input->dtype() == loco::DataType::FLOAT32 and
+ (weight_feature_input->dtype() == loco::DataType::S8 or
+ weight_feature_input->dtype() == loco::DataType::U8))
+ {
+ throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+ }
+
+ std::vector<uint32_t> scratchpad_sizes;
+
+ const auto batch_size = svdf_input->dim(0).value();
+ const auto num_filters = weight_feature_input->dim(0).value();
+ const auto rank = svdf->svdf_rank();
+ const auto num_units = num_filters / rank;
+
+ if (svdf_input->dtype() == loco::DataType::S8)
+ {
+ scratchpad_sizes.push_back(batch_size * num_filters * sizeof(int32_t));
+ scratchpad_sizes.push_back(batch_size * num_units * sizeof(int32_t));
+ }
+ else
+ {
+ scratchpad_sizes.push_back(batch_size * num_filters * sizeof(float));
+ }
+
+ return scratchpad_sizes;
+ }
+
+private:
+ bool _use_dsp;
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_CMSISNN_H
diff --git a/compiler/circle-execution-plan/pal/ScratchpadHelperLinux.h b/compiler/circle-execution-plan/pal/ScratchpadHelperLinux.h
new file mode 100644
index 000000000..811aa67c3
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/ScratchpadHelperLinux.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_LINUX_H
+#define CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_LINUX_H
+
+#include "IScratchpadHelper.h"
+#include <loco/IR/DataTypeTraits.h>
+
+namespace circle_planner
+{
+
+class ScratchpadHelperLinux : public IScratchpadHelper
+{
+public:
+ uint32_t ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) final
+ {
+ // for linux AveragePool2d scratchpad tensors size = 0
+ return 0;
+ }
+
+ std::vector<uint32_t>
+ ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) final
+ {
+ const auto lhs = loco::must_cast<luci::CircleNode *>(batch_mat_mul->x());
+ const auto rhs = loco::must_cast<luci::CircleNode *>(batch_mat_mul->y());
+
+ std::vector<uint32_t> scratchpad_sizes;
+
+ // Scratchpad for lhs
+ uint32_t scratchpad_size = 1;
+ for (int32_t i = 0; i < lhs->rank(); ++i)
+ scratchpad_size *= lhs->dim(i).value();
+
+ scratchpad_sizes.push_back(scratchpad_size * loco::size(lhs->dtype()));
+
+ // Scratchpad for rhs
+ scratchpad_size = 1;
+ for (int32_t i = 0; i < rhs->rank(); ++i)
+ scratchpad_size *= rhs->dim(i).value();
+
+ scratchpad_sizes.push_back(scratchpad_size * loco::size(rhs->dtype()));
+
+ return scratchpad_sizes;
+ }
+
+ uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *conv) final
+ {
+ const auto conv_input = loco::must_cast<luci::CircleNode *>(conv->input());
+ const auto filter = loco::must_cast<luci::CircleNode *>(conv->filter());
+
+ const uint32_t stride_height = conv->stride()->h();
+ const uint32_t stride_width = conv->stride()->w();
+
+ const uint32_t dilation_height_factor = conv->dilation()->h();
+ const uint32_t dilation_width_factor = conv->dilation()->w();
+
+ const uint32_t filter_height = filter->dim(1).value();
+ const uint32_t filter_width = filter->dim(2).value();
+
+ const bool need_dilated_im2col = dilation_height_factor != 1 || dilation_width_factor != 1;
+ const bool need_non_dilated_im2col =
+ stride_height != 1 || stride_width != 1 || filter_height != 1 || filter_width != 1;
+ const bool need_im2col = conv_input->dtype() != loco::DataType::S16 &&
+ (need_dilated_im2col || need_non_dilated_im2col);
+
+ if (!need_im2col)
+ {
+ return 0;
+ }
+
+ const uint32_t input_depth = conv_input->dim(3).value();
+ const uint32_t batches = conv_input->dim(0).value();
+
+ const uint32_t output_height = conv->dim(1).value();
+ const uint32_t output_width = conv->dim(2).value();
+
+ return batches * output_height * output_width * input_depth * filter_height * filter_width *
+ size(conv_input->dtype());
+ }
+
+ uint32_t
+ ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) final
+ {
+ // for linux DepthwiseConv2d scratchpad tensors size = 0
+ return 0;
+ }
+
+ std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) final
+ {
+ const auto svdf_input = loco::must_cast<luci::CircleNode *>(svdf->input());
+ const auto weight_feature_input = loco::must_cast<luci::CircleNode *>(svdf->weight_feature());
+
+ if (svdf_input->dtype() == loco::DataType::FLOAT32 and
+ (weight_feature_input->dtype() == loco::DataType::S8 or
+ weight_feature_input->dtype() == loco::DataType::U8))
+ {
+ throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+ }
+
+ std::vector<uint32_t> scratchpad_sizes;
+
+ const auto batch_size = svdf_input->dim(0).value();
+ const auto num_filters = weight_feature_input->dim(0).value();
+ const auto rank = svdf->svdf_rank();
+ const auto num_units = num_filters / rank;
+
+ if (svdf_input->dtype() == loco::DataType::S8)
+ {
+ scratchpad_sizes.push_back(batch_size * num_filters * sizeof(int32_t));
+ scratchpad_sizes.push_back(batch_size * num_units * sizeof(int32_t));
+ }
+ else
+ {
+ scratchpad_sizes.push_back(batch_size * num_filters * sizeof(float));
+ }
+
+ return scratchpad_sizes;
+ }
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_LINUX_H
diff --git a/compiler/circle-execution-plan/pal/ScratchpadHelperMCU.h b/compiler/circle-execution-plan/pal/ScratchpadHelperMCU.h
new file mode 100644
index 000000000..14b41640c
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/ScratchpadHelperMCU.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_MCU_H
+#define CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_MCU_H
+
+#include "IScratchpadHelper.h"
+
+namespace circle_planner
+{
+
+class ScratchpadHelperMCU : public IScratchpadHelper
+{
+public:
+ uint32_t ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) final
+ {
+ // for mcu AveragePool2d scratchpad tensors size = 0
+ return 0;
+ }
+
+ std::vector<uint32_t>
+ ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) final
+ {
+ throw std::runtime_error("BatchMatMul is not currently supported for mcu platform");
+ }
+
+ uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *) final
+ {
+ // for mcu scratchpad size = 0
+ return 0;
+ }
+
+ uint32_t
+ ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) final
+ {
+ // for mcu DepthwiseConv2d scratchpad tensors size = 0
+ return 0;
+ }
+
+ std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) final
+ {
+ const auto svdf_input = loco::must_cast<luci::CircleNode *>(svdf->input());
+ const auto weight_feature_input = loco::must_cast<luci::CircleNode *>(svdf->weight_feature());
+
+ if (svdf_input->dtype() == loco::DataType::FLOAT32 and
+ (weight_feature_input->dtype() == loco::DataType::S8 or
+ weight_feature_input->dtype() == loco::DataType::U8))
+ {
+ throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+ }
+
+ std::vector<uint32_t> scratchpad_sizes;
+
+ const auto batch_size = svdf_input->dim(0).value();
+ const auto num_filters = weight_feature_input->dim(0).value();
+ const auto rank = svdf->svdf_rank();
+ const auto num_units = num_filters / rank;
+
+ if (svdf_input->dtype() == loco::DataType::S8)
+ {
+ scratchpad_sizes.push_back(batch_size * num_filters * sizeof(int32_t));
+ scratchpad_sizes.push_back(batch_size * num_units * sizeof(int32_t));
+ }
+ else
+ {
+ scratchpad_sizes.push_back(batch_size * num_filters * sizeof(float));
+ }
+
+ return scratchpad_sizes;
+ }
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_MCU_H
diff --git a/compiler/circle-execution-plan/pal/TargetPlatform.h b/compiler/circle-execution-plan/pal/TargetPlatform.h
new file mode 100644
index 000000000..538a502fe
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/TargetPlatform.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_TARGET_PLATFORM_H
+#define CIRCLE_EXECUTION_PLAN_TARGET_PLATFORM_H
+
+namespace circle_planner
+{
+
+enum SupportedPlatformType
+{
+ LINUX,
+ MCU,
+ CMSISNN
+};
+
+struct TargetPlatform
+{
+ SupportedPlatformType platform_type;
+ bool use_dsp;
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_TARGET_PLATFORM_H
diff --git a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
index a54100b8c..1788124c3 100644
--- a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
+++ b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
@@ -35,6 +35,18 @@ int entry(int argc, char **argv)
arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
+ arser.add_argument("--platform")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(false)
+ .default_value("linux")
+ .help("Platform name: linux mcu cmsisnn");
+ arser.add_argument("--use_dsp")
+ .nargs(1)
+ .type(arser::DataType::BOOL)
+ .required(false)
+ .default_value(false)
+ .help("Plan with or without dsp (now can be used only with cmsisnn)");
try
{
@@ -47,8 +59,35 @@ int entry(int argc, char **argv)
return 255;
}
- std::string input_path = arser.get<std::string>("input");
- std::string output_path = arser.get<std::string>("output");
+ const std::string input_path = arser.get<std::string>("input");
+ const std::string output_path = arser.get<std::string>("output");
+ const std::string platform_name = arser.get<std::string>("--platform");
+ const bool use_dsp = arser.get<bool>("--use_dsp");
+
+ if (platform_name != "cmsisnn" && use_dsp)
+ {
+ std::cerr << "ERROR: Now use_dsp can be used only with cmsisnn" << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ circle_planner::SupportedPlatformType platform_type;
+ if (platform_name == "linux")
+ {
+ platform_type = circle_planner::SupportedPlatformType::LINUX;
+ }
+ else if (platform_name == "mcu")
+ {
+ platform_type = circle_planner::SupportedPlatformType::MCU;
+ }
+ else if (platform_name == "cmsisnn")
+ {
+ platform_type = circle_planner::SupportedPlatformType::CMSISNN;
+ }
+ else
+ {
+ std::cerr << "ERROR: Invalid platform name '" << platform_name << "'" << std::endl;
+ return EXIT_FAILURE;
+ }
foder::FileLoader file_loader{input_path};
std::vector<char> model_data;
@@ -82,8 +121,8 @@ int entry(int argc, char **argv)
auto module = importer.importModule(circle_model);
// Do main job
- luci::ExecutionPlanner execution_planner(module->graph());
- execution_planner.get_execution_plan();
+ circle_planner::ExecutionPlanner execution_planner(module->graph(), {platform_type, use_dsp});
+ execution_planner.make_execution_plan();
// Export to output Circle file
luci::CircleExporter exporter;
diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
index c37d1e5f5..ec2ec1362 100644
--- a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
+++ b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
@@ -18,72 +18,49 @@
#include <loco/IR/Algorithm.h>
#include <luci/UserSettings.h>
-namespace luci
+namespace circle_planner
{
namespace
{
-constexpr uint32_t nodeNotAssigned = std::numeric_limits<int32_t>::max();
+constexpr uint32_t node_not_assigned = std::numeric_limits<int32_t>::max();
-uint32_t compute_output_size(Padding padding, uint32_t image_size, uint32_t filter_size,
- uint32_t stride, uint32_t dilation_rate = 1)
+bool isExecutableNode(const luci::CircleNode *node)
{
- const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
- switch (padding)
+ switch (node->opcode())
{
- case Padding::SAME:
- return (image_size + stride - 1) / stride;
- case Padding::VALID:
- return (image_size + stride - effective_filter_size) / stride;
+ // The following nodes denote outputs of multiple-output nodes.
+ // The list is synchronized with the same list from luci-interpreter/src/loader/GraphLoader.cpp
+ case luci::CircleOpcode::CIRCLEIFOUT:
+ case luci::CircleOpcode::CIRCLESPLITOUT:
+ case luci::CircleOpcode::CIRCLESPLITVOUT:
+ case luci::CircleOpcode::CIRCLEUNPACKOUT:
+ case luci::CircleOpcode::CIRCLEWHILEOUT:
+ return false;
default:
- assert(false);
+ return true;
}
}
-// Method finds (if necessary) size for im2col temporary tensor.
-uint32_t compute_im2col_size(const luci::CircleConv2D *conv)
+bool isTensorProducingNode(const luci::CircleNode *node)
{
- auto conv_input = loco::must_cast<luci::CircleNode *>(conv->input());
- auto filter = loco::must_cast<luci::CircleNode *>(conv->filter());
- auto padding = (conv->padding());
- uint32_t stride_height = conv->stride()->h();
- uint32_t stride_width = conv->stride()->w();
-
- uint32_t dilation_height_factor = conv->dilation()->h();
- uint32_t dilation_width_factor = conv->dilation()->w();
-
- uint32_t filter_height = filter->dim(1).value();
- uint32_t filter_width = filter->dim(2).value();
-
- const bool need_dilated_im2col = dilation_height_factor != 1 || dilation_width_factor != 1;
- const bool need_non_dilated_im2col =
- stride_height != 1 || stride_width != 1 || filter_height != 1 || filter_width != 1;
- bool need_im2col =
- conv_input->dtype() != loco::DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col);
-
- if (!need_im2col)
+ switch (node->opcode())
{
- return 0;
+ // The following nodes are multiple-output nodes. They do not produce tensors, the tensors
+ // are produced by the corresponding *Out nodes instead.
+ // The list is synchronized with the same list from luci-interpreter/src/loader/GraphLoader.cpp
+ case luci::CircleOpcode::IF:
+ case luci::CircleOpcode::SPLIT:
+ case luci::CircleOpcode::UNPACK:
+ return false;
+ default:
+ return true;
}
-
- uint32_t input_depth = conv_input->dim(3).value();
- uint32_t input_height = conv_input->dim(1).value();
- uint32_t input_width = conv_input->dim(2).value();
-
- uint32_t output_height = compute_output_size(padding, input_height, filter_height, stride_height,
- dilation_height_factor);
- uint32_t output_width =
- compute_output_size(padding, input_width, filter_width, stride_width, dilation_width_factor);
-
- uint32_t batches = conv_input->dim(0).value();
-
- return batches * output_height * output_width * input_depth * filter_height * filter_width *
- size(conv_input->dtype());
}
} // namespace
-void ExecutionPlanner::get_execution_plan()
+void ExecutionPlanner::make_execution_plan()
{
get_default_execution_order_plan();
_required_size = get_offsets_with_greedy_by_size();
@@ -106,23 +83,23 @@ void ExecutionPlanner::get_default_execution_order_plan()
void ExecutionPlanner::get_usage_interval()
{
// Initialize vectors of first and last nodes for usage interval
- _alloc_node.assign(_ordered_nodes.size(), nodeNotAssigned);
- _dealloc_node.assign(_ordered_nodes.size(), nodeNotAssigned);
+ _alloc_node.assign(_ordered_nodes.size(), node_not_assigned);
+ _dealloc_node.assign(_ordered_nodes.size(), node_not_assigned);
// Vector for count usages
std::vector<int> usages_counts(_ordered_nodes.size(), 0);
auto allocate = [this](uint32_t node, uint32_t tensor) {
- if (_alloc_node[tensor] != nodeNotAssigned)
+ if (_alloc_node[tensor] != node_not_assigned)
{
return;
}
- assert(_dealloc_node[tensor] == nodeNotAssigned);
+ assert(_dealloc_node[tensor] == node_not_assigned);
_alloc_node[tensor] = node;
};
auto deallocate = [this](uint32_t node, uint32_t tensor) {
- assert(_dealloc_node[tensor] == nodeNotAssigned);
+ assert(_dealloc_node[tensor] == node_not_assigned);
_dealloc_node[tensor] = node;
};
@@ -158,13 +135,24 @@ void ExecutionPlanner::get_usage_interval()
for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
{
const auto node = _ordered_nodes.at(i);
+ auto prev_nodes = preds(node);
if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
{
allocate(0, i);
}
- allocate(i, i);
+ else if (!isExecutableNode(loco::must_cast<luci::CircleNode *>(node)))
+ {
+ // If current node is multi output node than begin life time for current node should start
+ // when prev node start live
+ auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), *prev_nodes.begin());
+ size_t index = std::distance(_ordered_nodes.begin(), it);
+ allocate(index, i);
+ }
+ else
+ {
+ allocate(i, i);
+ }
- auto prev_nodes = preds(node);
for (auto &prev_node : prev_nodes)
{
auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), prev_node);
@@ -203,7 +191,7 @@ uint32_t ExecutionPlanner::get_offsets_with_greedy_by_size()
uint32_t ExecutionPlanner::greedy_by_size_approach()
{
size_t result_size = 0;
- create_alloc_node_inform_vector(false, false, false);
+ create_alloc_node_inform_vector(_is_null_consts, _is_null_inputs, _is_null_scratchpads);
std::vector<AllocationNodeInformation> ordered_alloc_inform;
for (auto &current_node : _alloc_node_inform_vector)
{
@@ -250,22 +238,22 @@ uint32_t ExecutionPlanner::greedy_by_size_approach()
}
void ExecutionPlanner::create_alloc_node_inform_vector(bool null_consts, bool null_inputs,
- bool null_im2col)
+ bool null_scratchpad)
{
auto node_compare = [this](const AllocationNodeInformation &alloc_1,
const AllocationNodeInformation &alloc_2) {
auto idx1 = alloc_1.node_num;
auto idx2 = alloc_2.node_num;
- if (this->_alloc_node[idx1] == 0 && this->_dealloc_node[idx1] == nodeNotAssigned)
+ if (this->_alloc_node[idx1] == 0 && this->_dealloc_node[idx1] == node_not_assigned)
{
- if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == nodeNotAssigned)
+ if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == node_not_assigned)
{
return idx1 < idx2;
}
return true;
}
- if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == nodeNotAssigned)
+ if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == node_not_assigned)
{
return false;
}
@@ -305,30 +293,66 @@ void ExecutionPlanner::create_alloc_node_inform_vector(bool null_consts, bool nu
{
_alloc_node_inform_vector[i].size = 0;
}
+ else if (!isTensorProducingNode(circle_node))
+ {
+ _alloc_node_inform_vector[i].size = 0;
+ }
else
{
_alloc_node_inform_vector[i].size = node_size;
}
- // Im2col
- auto opcode = circle_node->opcode();
- if (opcode == luci::CircleOpcode::CONV_2D)
+ // Scratchpad If needed
+ std::vector<uint32_t> scratchpad_sizes;
+ if (!null_scratchpad)
{
- auto conv = loco::must_cast<const luci::CircleConv2D *>(circle_node);
- auto im2col_size = compute_im2col_size(conv);
- if (im2col_size > 0)
+ switch (circle_node->opcode())
{
- AllocationNodeInformation temp_alloc;
-
- if (null_im2col)
+ case luci::CircleOpcode::AVERAGE_POOL_2D:
{
- temp_alloc.size = 0;
+ const auto avg_pool = loco::must_cast<const luci::CircleAveragePool2D *>(circle_node);
+ scratchpad_sizes.push_back(
+ _scratchpad_helper->ComputeScratchpadSizeAveragePool2d(avg_pool));
+ break;
}
- else
+ case luci::CircleOpcode::BATCH_MATMUL:
{
- temp_alloc.size = im2col_size;
+ const auto batch_mat_mul = loco::must_cast<const luci::CircleBatchMatMul *>(circle_node);
+ scratchpad_sizes = _scratchpad_helper->ComputeScratchpadSizeBatchMatMul(batch_mat_mul);
+ break;
}
+ case luci::CircleOpcode::CONV_2D:
+ {
+ const auto conv = loco::must_cast<const luci::CircleConv2D *>(circle_node);
+ scratchpad_sizes.push_back(_scratchpad_helper->ComputeScratchpadSizeConv2d(conv));
+ break;
+ }
+ case luci::CircleOpcode::DEPTHWISE_CONV_2D:
+ {
+ const auto depthwise_conv =
+ loco::must_cast<const luci::CircleDepthwiseConv2D *>(circle_node);
+ scratchpad_sizes.push_back(
+ _scratchpad_helper->ComputeScratchpadSizeDepthwiseConv2d(depthwise_conv));
+ break;
+ }
+ case luci::CircleOpcode::SVDF:
+ {
+ const auto svdf = loco::must_cast<const luci::CircleSVDF *>(circle_node);
+ scratchpad_sizes = _scratchpad_helper->ComputeScratchpadSizeSVDF(svdf);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ for (const auto scratchpad_size : scratchpad_sizes)
+ {
+ if (scratchpad_size > 0)
+ {
+ AllocationNodeInformation temp_alloc;
+ temp_alloc.size = scratchpad_size;
temp_alloc.first_node = i - 1;
temp_alloc.last_node = i + 1;
temp_alloc.node_num = i;
@@ -352,7 +376,7 @@ void ExecutionPlanner::dump_inform()
{
auto current_node_it = std::find_if(
_alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(),
- [this, i](const AllocationNodeInformation &x) { return x.node_num == i && !x.is_temp; });
+ [i](const AllocationNodeInformation &x) { return x.node_num == i && !x.is_temp; });
for (uint32_t j = 0; j < _ordered_nodes.size(); j++)
{
auto first_node = _alloc_node[j];
@@ -360,7 +384,7 @@ void ExecutionPlanner::dump_inform()
auto it = std::find_if(
_alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(),
- [this, j](const AllocationNodeInformation &x) { return x.node_num == j && !x.is_temp; });
+ [j](const AllocationNodeInformation &x) { return x.node_num == j && !x.is_temp; });
if (i >= first_node && i <= last_node)
{
current_node_it->breadth += it->size;
@@ -386,4 +410,4 @@ void ExecutionPlanner::dump_inform()
});
}
-} // namespace luci
+} // namespace circle_planner
diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.h b/compiler/circle-execution-plan/src/ExecutionPlanner.h
index 8e3d9b46a..e0833c407 100644
--- a/compiler/circle-execution-plan/src/ExecutionPlanner.h
+++ b/compiler/circle-execution-plan/src/ExecutionPlanner.h
@@ -17,10 +17,15 @@
#ifndef CIRCLE_EXECUTION_PLANNER_H
#define CIRCLE_EXECUTION_PLANNER_H
+#include "TargetPlatform.h"
+#include "IScratchpadHelper.h"
+#include "ScratchpadHelperLinux.h"
+#include "ScratchpadHelperMCU.h"
+#include "ScratchpadHelperCMSISNN.h"
#include <luci/IR/Module.h>
#include <luci/Plan/CircleNodeExecutionPlan.h>
-namespace luci
+namespace circle_planner
{
// struct for additional information for the node. it helps build allocations plan for nodes.
struct AllocationNodeInformation
@@ -50,7 +55,7 @@ struct AllocationNodeInformation
uint32_t last_node;
// is the current node temporary or not
bool is_temp;
- // operation breadth of current node
+ // Breadth is a sum of live tensors sizes at the moment of execution of given node
uint32_t breadth;
bool operator<(const AllocationNodeInformation &other) const { return offset < other.offset; }
@@ -60,12 +65,44 @@ class ExecutionPlanner
{
public:
ExecutionPlanner() = delete;
- explicit ExecutionPlanner(loco::Graph *graph) { _graph = graph; };
+ explicit ExecutionPlanner(loco::Graph *graph) : _graph(graph)
+ {
+ _scratchpad_helper = std::make_unique<ScratchpadHelperLinux>();
+ }
+
+ explicit ExecutionPlanner(loco::Graph *graph, TargetPlatform target_platform) : _graph(graph)
+ {
+ switch (target_platform.platform_type)
+ {
+ case LINUX:
+ _scratchpad_helper = std::make_unique<ScratchpadHelperLinux>();
+ break;
+ case MCU:
+ _scratchpad_helper = std::make_unique<ScratchpadHelperMCU>();
+ break;
+ case CMSISNN:
+ _scratchpad_helper = std::make_unique<ScratchpadHelperCMSISNN>(target_platform.use_dsp);
+ break;
+ default:
+ assert(false && "Use unsupported platform");
+ }
+ };
// Method provides execution plan, which contains execution order and
// memory offsets for all nodes in _graph.
// This plan writes in nodes annotation information with help of CircleNodeExecutionPlan class.
- void get_execution_plan();
+ void make_execution_plan();
+
+ // Method change planning mode:
+ // is_null_consts = true - constants are no longer taken into account when planning
+ // is_null_inputs = true - input are no longer taken into account when planning
+ // is_null_scratchpads = true - scratchpads are no longer taken into account when planning
+ void change_planning_mode(bool is_null_consts, bool is_null_inputs, bool is_null_scratchpads)
+ {
+ _is_null_consts = is_null_consts;
+ _is_null_inputs = is_null_inputs;
+ _is_null_scratchpads = is_null_scratchpads;
+ };
private:
// Method gets default execution order plan and saves it in _ordered_nodes vector.
@@ -83,18 +120,19 @@ private:
// Return: required size of buffer.
uint32_t get_offsets_with_greedy_by_size();
- // Realization of greedy by size approach to find offsets for nodes.
+ // Realization of greedy by size approach (algorithm is mentioned in
+ // "EFFICIENT MEMORY MANAGEMENT FOR DEEP NEURAL NET INFERENCE" paper) to find offsets for nodes.
uint32_t greedy_by_size_approach();
// Method creates and fills _alloc_node_inform_vector with usage interval inform and node's sizes.
// null_consts = true - size of const nodes will be equal 0;
// null_inputs = true - size of input nodes will be equal 0;
- // null_im2col = true - size of im2col nodes will be equal 0;
- // It using if we don't want to take input(const or im2col) nodes into account
+ // null_scratchpad = true - size of scratchpad nodes will be equal 0;
+ // It using if we don't want to take input(const or scratchpads) nodes into account
// when determining offsets and calculating the required buffer size. This is uses for
// experiments.
void create_alloc_node_inform_vector(bool null_consts = false, bool null_inputs = false,
- bool null_im2col = false);
+ bool null_scratchpad = false);
// Stores allocation additional information for the all nodes from _graph.
std::vector<AllocationNodeInformation> _alloc_node_inform_vector;
@@ -121,10 +159,21 @@ private:
loco::Graph *_graph;
+ // Calculate size of scratchpad tensors for current platform
+ std::unique_ptr<IScratchpadHelper> _scratchpad_helper;
+
// Required memory size.
uint32_t _required_size = 0;
+
+ // Flags for choosing different planning modes:
+ // _is_null_consts = true - constants are no longer taken into account when planning
+ // _is_null_inputs = true - input are no longer taken into account when planning
+ // _is_null_scratchpads = true - scratchpads are no longer taken into account when planning
+ bool _is_null_consts = false;
+ bool _is_null_inputs = false;
+ bool _is_null_scratchpads = false;
};
-} // namespace luci
+} // namespace circle_planner
#endif // CIRCLE_EXECUTION_PLANNER_H