diff options
Diffstat (limited to 'runtimes/neurun/src/backend/cpu')
18 files changed, 689 insertions, 653 deletions
diff --git a/runtimes/neurun/src/backend/cpu/CMakeLists.txt b/runtimes/neurun/src/backend/cpu/CMakeLists.txt index 95e9af687..dc4406a65 100644 --- a/runtimes/neurun/src/backend/cpu/CMakeLists.txt +++ b/runtimes/neurun/src/backend/cpu/CMakeLists.txt @@ -1,19 +1,18 @@ file(GLOB_RECURSE SOURCES "*.cc") -add_library(${LIB_NEURUN_BACKEND_CPU} STATIC ${SOURCES}) +add_library(${LIB_NEURUN_BACKEND_CPU} SHARED ${SOURCES}) target_include_directories(${LIB_NEURUN_BACKEND_CPU} PUBLIC ${NNFW_INCLUDE_DIR}) target_include_directories(${LIB_NEURUN_BACKEND_CPU} PUBLIC ${NEURUN_INCLUDE_DIR}) target_include_directories(${LIB_NEURUN_BACKEND_CPU} PUBLIC ${CMAKE_SOURCE_DIR}/externals/tensorflow) -target_link_libraries(${LIB_NEURUN_BACKEND_CPU} arm_compute) # TODO We should not need this target_link_libraries(${LIB_NEURUN_BACKEND_CPU} tensorflow-lite) -target_link_libraries(${LIB_NEURUN_BACKEND_CPU} nnfw_util) -target_link_libraries(${LIB_NEURUN_BACKEND_CPU} nnfw_support_nnapi) +target_link_libraries(${LIB_NEURUN_BACKEND_CPU} nnfw_lib_misc) +target_link_libraries(${LIB_NEURUN_BACKEND_CPU} nnfw_lib_cpp14) target_link_libraries(${LIB_NEURUN_BACKEND_CPU} ${LIB_NEURUN_KERNEL_CPU}) +target_link_libraries(${LIB_NEURUN_BACKEND_CPU} ${LIB_NEURUN}) target_compile_options(${LIB_NEURUN_BACKEND_CPU} PRIVATE -Wall -Wextra -Werror) -set_target_properties(${LIB_NEURUN_BACKEND_CPU} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${LIB_NEURUN_BACKEND_CPU} PROPERTIES OUTPUT_NAME backend_cpu) install(TARGETS ${LIB_NEURUN_BACKEND_CPU} DESTINATION lib/neurun) diff --git a/runtimes/neurun/src/backend/cpu/BackendConfig.cc b/runtimes/neurun/src/backend/cpu/Config.cc index 34fc3491a..001ba9d02 100644 --- a/runtimes/neurun/src/backend/cpu/BackendConfig.cc +++ b/runtimes/neurun/src/backend/cpu/Config.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "backend/cpu/BackendConfig.h" +#include "backend/cpu/Config.h" namespace neurun { @@ -23,7 +23,7 @@ namespace backend namespace cpu { -void BackendConfig::initialize() +void Config::initialize() { // DO NOTHING } diff --git a/runtimes/neurun/src/backend/cpu/BackendConfig.h b/runtimes/neurun/src/backend/cpu/Config.h index 109235bb1..ad9ca0ee8 100644 --- a/runtimes/neurun/src/backend/cpu/BackendConfig.h +++ b/runtimes/neurun/src/backend/cpu/Config.h @@ -14,10 +14,10 @@ * limitations under the License. */ -#ifndef __NEURUN_BACKEND_CPU_BACKEND_CONFIG_H__ -#define __NEURUN_BACKEND_CPU_BACKEND_CONFIG_H__ +#ifndef __NEURUN_BACKEND_CPU_CONFIG_H__ +#define __NEURUN_BACKEND_CPU_CONFIG_H__ -#include "backend/IBackendConfig.h" +#include "backend/interface/IConfig.h" namespace neurun { @@ -26,20 +26,26 @@ namespace backend namespace cpu { -class BackendConfig : public IBackendConfig +class Config : public IConfig { public: - BackendConfig() + Config() { // DO NOTHING } + virtual std::string id() override { return "cpu"; } virtual void initialize() override; virtual graph::operand::Layout getOperandLayout() { return graph::operand::Layout::NHWC; } + virtual bool SupportSubTensorAlloc() override + { + // NOTE CPU allocator cannot support subtensor allocation yet + return false; + } }; } // namespace cpu } // namespace backend } // namespace neurun -#endif // __NEURUN_BACKEND_CPU_BACKEND_CONFIG_H__ +#endif // __NEURUN_BACKEND_CPU_CONFIG_H__ diff --git a/runtimes/neurun/src/backend/cpu/InitializerGenerator.cc b/runtimes/neurun/src/backend/cpu/InitializerGenerator.cc deleted file mode 100644 index 7b08c7131..000000000 --- a/runtimes/neurun/src/backend/cpu/InitializerGenerator.cc +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "InitializerGenerator.h" - -#include "internal/nnapi/kernel/Reader.h" -#include "internal/nnapi/kernel/View.h" -#include "util/kernel/IndexIterator.h" - -#include "NeuralNetworks.h" - -namespace neurun -{ -namespace backend -{ -namespace cpu -{ - -InitializerGenerator::InitializerGenerator(const neurun::graph::operand::Set &ctx) : _ctx(ctx) -{ - // DO NOTHING -} - -Initializer -InitializerGenerator::generateWeight(const graph::operation::Conv2D::Implicit::Node &node) -{ - const ::neurun::graph::operand::Index ker_index{node.getInputs().at(1)}; - - const auto ker_shape = _ctx.at(ker_index).shape().asKernel(); - auto ker_base = _ctx.at(ker_index).data().base(); - auto ker_size = _ctx.at(ker_index).data().size(); - - return [ker_shape, ker_base, ker_size](::arm_compute::ITensor &tensor) { - const ::internal::nnapi::kernel::Reader<float> from{ker_shape, ker_base, ker_size}; - ::internal::nnapi::kernel::View<float> into{&tensor}; - - ::nnfw::util::kernel::iterate(ker_shape) - << [&](uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) { - const auto value = from.at(nth, ch, row, col); - into.at(nth, row, col, ch) = value; - }; - }; -} - -Initializer InitializerGenerator::generateWeight(const graph::operation::FullyConnected::Node &node) -{ - const ::neurun::graph::operand::Index weight_index{node.getInputs().at(1)}; - const ::neurun::graph::operand::Index input_index{node.getInputs().at(0)}; - - const auto num_output = _ctx.at(weight_index).shape().dim(0); - auto weight_base = _ctx.at(weight_index).data().base(); - auto weight_size = _ctx.at(weight_index).data().size(); - auto weight_type = _ctx.at(weight_index).typeInfo().type(); - - // NOTE We assume that input is a feature map - // TODO Remove this restriction! - const auto ifm_shape = _ctx.at(input_index).shape().asFeature(); - - switch (weight_type) - { - case ::neurun::graph::operand::DataType::TENSOR_FLOAT32: - { - return [num_output, ifm_shape, weight_base, weight_size](::arm_compute::ITensor &tensor) { - const ::nnfw::util::kernel::Shape ker_shape{num_output, ifm_shape.C, ifm_shape.H, - ifm_shape.W}; - const ::internal::nnapi::kernel::Reader<float> from{ker_shape, weight_base, weight_size}; - - ::nnfw::util::kernel::iterate(ker_shape) - << [&](uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) { - const auto value = from.at(nth, ch, row, col); - - uint32_t offset = 0; - - // NNAPI uses NHWC ordering - offset += nth * ifm_shape.H * ifm_shape.W * ifm_shape.C; - offset += row * ifm_shape.W * ifm_shape.C; - offset += col * ifm_shape.C; - offset += ch; - - const ::arm_compute::Coordinates coordinate{offset}; - - auto into = reinterpret_cast<float *>(tensor.ptr_to_element(coordinate)); - - *into = value; - }; - }; - } - case ::neurun::graph::operand::DataType::TENSOR_QUANT8_ASYMM: - { - return [num_output, ifm_shape, weight_base, weight_size](::arm_compute::ITensor &tensor) { - const ::nnfw::util::kernel::Shape ker_shape{num_output, ifm_shape.C, ifm_shape.H, - ifm_shape.W}; - const ::internal::nnapi::kernel::Reader<uint8_t> from{ker_shape, weight_base, weight_size}; - ::nnfw::util::kernel::iterate(ker_shape) - << [&](uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) { - const auto value = from.at(nth, ch, row, col); - uint32_t offset = 0; - - // NNAPI uses NHWC ordering - offset += nth * ifm_shape.H * ifm_shape.W * ifm_shape.C; - offset += row * ifm_shape.W * ifm_shape.C; - offset += col * ifm_shape.C; - offset += ch; - - const ::arm_compute::Coordinates coordinate{offset}; - - auto into = reinterpret_cast<uint8_t *>(tensor.ptr_to_element(coordinate)); - - *into = value; - }; - }; - } - default: - { - throw std::runtime_error("Not supported weight type"); - } - } -} - -Initializer InitializerGenerator::generateBias(const graph::operation::Conv2D::Implicit::Node &node) -{ - // TODO Refactor so we can reuse the common code - - const ::neurun::graph::operand::Index bias_index{node.getInputs().at(2)}; - - auto bias_base = _ctx.at(bias_index).data().base(); - const auto bias_size = _ctx.at(bias_index).shape().asVector(); - - return [bias_base, bias_size](::arm_compute::ITensor &tensor) { - for (int32_t n = 0; n < bias_size; ++n) - { - const ::arm_compute::Coordinates coordinate{n}; - - float *into = reinterpret_cast<float *>(tensor.ptr_to_element(coordinate)); - - const float *from = reinterpret_cast<const float *>(bias_base) + n; - const auto value = *from; - - *into = value; - } - }; -} - -Initializer InitializerGenerator::generateBias(const graph::operation::FullyConnected::Node &node) -{ - const ::neurun::graph::operand::Index bias_index{node.getInputs().at(2)}; - - auto bias_base = _ctx.at(bias_index).data().base(); - auto bias_type = _ctx.at(bias_index).typeInfo().type(); - const auto bias_size = _ctx.at(bias_index).shape().asVector(); - - switch (bias_type) - { - case ::neurun::graph::operand::DataType::TENSOR_FLOAT32: - { - return [bias_base, bias_size](::arm_compute::ITensor &tensor) { - for (int32_t n = 0; n < bias_size; ++n) - { - const ::arm_compute::Coordinates coordinate{n}; - - float *into = reinterpret_cast<float *>(tensor.ptr_to_element(coordinate)); - - const float *from = reinterpret_cast<const float *>(bias_base) + n; - const auto value = *from; - - *into = value; - } - }; - } - case ::neurun::graph::operand::DataType::TENSOR_QUANT8_ASYMM: - { - return [bias_base, bias_size](::arm_compute::ITensor &tensor) { - for (int32_t n = 0; n < bias_size; ++n) - { - const ::arm_compute::Coordinates coordinate{n}; - - uint8_t *into = reinterpret_cast<uint8_t *>(tensor.ptr_to_element(coordinate)); - - const uint8_t *from = reinterpret_cast<const uint8_t *>(bias_base) + n; - const auto value = *from; - - *into = value; - } - }; - } - default: - { - throw std::runtime_error("Not supported bias type"); - } - } -} - -} // namespace cpu -} // namespace backend -} // namespace neurun diff --git a/runtimes/neurun/src/backend/cpu/InitializerGenerator.h b/runtimes/neurun/src/backend/cpu/InitializerGenerator.h deleted file mode 100644 index 42d37f48b..000000000 --- a/runtimes/neurun/src/backend/cpu/InitializerGenerator.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __NEURUN_BACKEND_CPU_INITIALIZER_GENERATOR_H__ -#define __NEURUN_BACKEND_CPU_INITIALIZER_GENERATOR_H__ - -#include "backend/IInitializerGenerator.h" - -#include "graph/operand/Set.h" - -namespace neurun -{ -namespace backend -{ -namespace cpu -{ - -class InitializerGenerator : public IInitializerGenerator -{ -public: - InitializerGenerator(const neurun::graph::operand::Set &ctx); - - Initializer generateWeight(const graph::operation::Conv2D::Implicit::Node &node) override; - Initializer generateWeight(const graph::operation::FullyConnected::Node &node) override; - - Initializer generateBias(const graph::operation::Conv2D::Implicit::Node &node) override; - Initializer generateBias(const graph::operation::FullyConnected::Node &node) override; - -private: - const neurun::graph::operand::Set &_ctx; -}; - -} // namespace cpu -} // namespace backend -} // namespace neurun - -#endif // __NEURUN_BACKEND_CPU_INITIALIZER_GENERATOR_H__ diff --git a/runtimes/neurun/src/backend/cpu/MemoryAllocator.cc b/runtimes/neurun/src/backend/cpu/MemoryAllocator.cc deleted file mode 100644 index 13d2a7ffc..000000000 --- a/runtimes/neurun/src/backend/cpu/MemoryAllocator.cc +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -//#include "internal/cpu/MemoryAllocator.h" diff --git a/runtimes/neurun/src/backend/cpu/MemoryAllocator.h b/runtimes/neurun/src/backend/cpu/MemoryAllocator.h deleted file mode 100644 index e3550ac07..000000000 --- a/runtimes/neurun/src/backend/cpu/MemoryAllocator.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __INTERNAL_CPU_MEMORY_ALLOCATOR_H__ -#define __INTERNAL_CPU_MEMORY_ALLOCATOR_H__ - -#include "arm_compute/runtime/ITensorAllocator.h" -#include "arm_compute/runtime/Memory.h" - -#include <cstdint> -#include <memory> -#include <vector> - -namespace arm_compute -{ -class Coordinates; -class TensorInfo; -class Tensor; -}; - -/** Basic implementation of a CPU memory tensor allocator. */ -class TensorAllocator : public ITensorAllocator -{ -public: - /** Default constructor. */ - TensorAllocator(Tensor *owner = nullptr); - /** Default destructor */ - ~TensorAllocator(); - - /** Make ITensorAllocator's init methods available */ - using ITensorAllocator::init; - - /** Shares the same backing memory with another tensor allocator, while the tensor info might be - * different. - * In other words this can be used to create a sub-tensor from another tensor while sharing the - * same memory. - * - * @note TensorAllocator have to be of the same specialized type. - * - * @param[in] allocator The allocator that owns the backing memory to be shared. Ownership becomes - * shared afterwards. - * @param[in] coords The starting coordinates of the new tensor inside the parent tensor. - * @param[in] sub_info The new tensor information (e.g. shape etc) - */ - void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo sub_info); - - /** Returns the pointer to the allocated data. */ - uint8_t *data() const; - - /** Allocate size specified by TensorInfo of CPU memory. - * - * @note The tensor must not already be allocated when calling this function. - * - */ - void allocate() override; - - /** Free allocated CPU memory. - * - * @note The tensor must have been allocated when calling this function. - * - */ - void free() override; - /** Import an existing memory as a tensor's backing memory - * - * @warning If the tensor is flagged to be managed by a memory manager, - * this call will lead to an error. - * @warning Ownership of memory depends on the way the @ref Memory object was constructed - * @note Calling free on a tensor with imported memory will just clear - * the internal pointer value. - * - * @param[in] memory Memory to import - * - * @return error status - */ - arm_compute::Status import_memory(Memory memory); - /** Associates the tensor with a memory group - * - * @param[in] associated_memory_group Memory group to associate the tensor with - */ - void set_associated_memory_group(MemoryGroup *associated_memory_group); - -protected: - /** No-op for CPU memory - * - * @return A pointer to the beginning of the tensor's allocation. - */ - uint8_t *lock() override; - - /** No-op for CPU memory. */ - void unlock() override; - -private: - MemoryGroup *_associated_memory_group; /**< Registered memory manager */ - Memory _memory; /**< CPU memory */ - Tensor *_owner; /**< Owner of the allocator */ -}; - -namespace internal -{ -namespace cpu -{ - -class MemoryAllocator : public -{ -}; - -} // namespace cpu -} // namespace internal - -#endif // __INTERNAL_CPU_MEMORY_ALLOCATOR_H__ diff --git a/runtimes/neurun/src/backend/cpu/MemoryPlanner.cc b/runtimes/neurun/src/backend/cpu/MemoryPlanner.cc new file mode 100644 index 000000000..2d0995b8a --- /dev/null +++ b/runtimes/neurun/src/backend/cpu/MemoryPlanner.cc @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MemoryPlanner.h" +#include "util/logging.h" +#include <cassert> + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +Allocator::Allocator(uint32_t capacity) +{ + assert(!_base && capacity != 0); + + _base = new uint8_t[capacity]; + + VERBOSE(ALLOC) << "allocation capacity: " << capacity << std::endl; + VERBOSE(ALLOC) << "base pointer: " << static_cast<void *>(_base) << std::endl; +} + +Allocator::~Allocator() { delete[] _base; } + +void BumpPlanner::claim(const model::operand::Index &ind, size_t size) +{ + assert(size != 0); + + Block blk{_capacity, size}; + _mem_plans[ind] = blk; + _capacity += size; + + VERBOSE(BP_PLANNER) << "CLAIM(#" << ind.value() << "): " << blk.offset << ", " << blk.size + << std::endl; +} + +void BumpPlanner::release(const model::operand::Index &ind) +{ + VERBOSE(BP_PLANNER) << "RELEASE(#" << ind.value() << "): " + << "NOTHING does" << std::endl; +} + +// There are some assumptions for claiming memory(== making a reservation for memory). +// 1. About _claim_table(std::map). +// - The table's data structure is std::map so that it always sorts +// value(model::operand::Index) by key(base_offset). +// - This claim() inserts key/value into _claim_table and the release() removes the key/value from +// _claim_table. +// - _claim_table shows the memory status at a certain point in time. Therefore, +// - If _claim_table has an offset and a certain size at a certain point in time, +// it means the place at the offset has been already claimed(== can't claim now. need to find +// someplace new). +// - If _claim_table doesn't have any element for an offset and a certain size at a certain +// point in time, it means the place at the offset can be claimed. +// 2. In the loop for _claim_table, we can assume the current claim_base_offset value is bigger than +// the previous claim_base_offset. +void FirstFitPlanner::claim(const model::operand::Index &ind, size_t size) +{ + assert(size != 0); + + // Find the right position for claiming + uint32_t next_offset = 0; + for (auto &mem_claim : _claim_table) + { + auto claimed_base_offset = mem_claim.first; + auto claimed_size = _mem_plans[mem_claim.second].size; + if (next_offset + size <= claimed_base_offset) + { + break; + } + else + { + next_offset = claimed_base_offset + claimed_size; + } + } + + // Now next_offset is set to the proper offset + _claim_table[next_offset] = ind; + _mem_plans[ind] = {next_offset, size}; + + VERBOSE(FF_PLANNER) << "claim(#" << ind.value() << "): [+" << next_offset << ", " << size << "sz]" + << std::endl; + + if (_capacity < next_offset + size) + { + _capacity = next_offset + size; + } +} + +void FirstFitPlanner::release(const model::operand::Index &ind) +{ + for (auto it = _claim_table.cbegin(); it != _claim_table.cend(); ++it) + { + if (it->second == ind) + { + uint32_t offset = it->first; + uint32_t index = ind.value(); + uint32_t size = _mem_plans[ind].size; + + _claim_table.erase(it); + + VERBOSE(FF_PLANNER) << "release(#" << index << "): [+" << offset << ", " << size << "sz]" + << std::endl; + return; + } + } + assert(!"Cannot release for given index. It has been not claimed or released already."); +} + +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtimes/neurun/src/backend/cpu/MemoryPlanner.h b/runtimes/neurun/src/backend/cpu/MemoryPlanner.h new file mode 100644 index 000000000..4b2661223 --- /dev/null +++ b/runtimes/neurun/src/backend/cpu/MemoryPlanner.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file MemoryPlanner.h + * @brief This file contains Memory Planning related classes + */ + +#ifndef __NEURUN_BACKEND_CPU_MEMORY_PLANNER_H__ +#define __NEURUN_BACKEND_CPU_MEMORY_PLANNER_H__ + +#include <map> +#include <unordered_map> + +#include "model/operand/Index.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +/** + * @brief Structure to have memory offset and size + */ +struct Block +{ + uint32_t offset; + uint32_t size; +}; + +/** + * @brief Class to allocate memory + */ +class Allocator +{ +public: + Allocator(uint32_t capacity); + ~Allocator(); + /** + * @brief Get memory base pointer + * @return base pointer + */ + uint8_t *base() const { return _base; } + +private: + uint8_t *_base = nullptr; +}; + +/** + * @brief Interface to plan memory + */ +struct IMemoryPlanner +{ + using MemoryPlans = std::unordered_map<model::operand::Index, Block>; + + /** + * @brief Claim memory for operand + * @param[in] index The operand index + * @param[in] size The size of the memory + */ + virtual void claim(const model::operand::Index &, size_t) = 0; + /** + * @brief Release memory for operand + * @param[in] index The operand index + */ + virtual void release(const model::operand::Index &) = 0; + /** + * @brief Get capacity for memory planning + * @return The value of capacity + */ + virtual uint32_t capacity() = 0; + /** + * @brief Get MemoryPlans + * @return MemoryPlans + */ + virtual MemoryPlans &memory_plans() = 0; +}; + +/** + * @brief Class to plan memory by bump way + */ +class BumpPlanner : public IMemoryPlanner +{ +public: + /** + * @brief Claim memory for operand by bump way + * @param[in] index The operand index + * @param[in] size The size of the memory + */ + virtual void claim(const model::operand::Index &, size_t) override; + /** + * @brief Release memory for operand by bump way + * @param[in] index The operand index + */ + virtual void release(const model::operand::Index &) override; + /** + * @brief Get capacity for memory planning + * @return The value of capacity + */ + virtual uint32_t capacity() override { return _capacity; } + /** + * @brief Get MemoryPlans + * @return MemoryPlans + */ + virtual MemoryPlans &memory_plans() override { return _mem_plans; } + +private: + uint32_t _capacity = 0; + MemoryPlans _mem_plans; +}; + +/** + * @brief Class to plan memory by firstfit way + */ +class FirstFitPlanner : public IMemoryPlanner +{ +public: + /** + * @brief Claim memory for operand by firstfit way + * @param[in] index The operand index + * @param[in] size The size of the memory + */ + virtual void claim(const model::operand::Index &, size_t) override; + /** + * @brief Release memory for operand by firstfit way + * @param[in] index The operand index + */ + virtual void release(const model::operand::Index &) override; + /** + * @brief Get capacity for memory planning + * @return The value of capacity + */ + virtual uint32_t capacity() override { return _capacity; } + /** + * @brief Get MemoryPlans + * @return MemoryPlans + */ + virtual MemoryPlans &memory_plans() override { return _mem_plans; } + +private: + uint32_t _capacity = 0; + MemoryPlans _mem_plans; + // Use std::map because claim() assumes that _claim_table is sorted by uint32_t(base_offset) + std::map<uint32_t, model::operand::Index> _claim_table; +}; + +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_MEMORY_PLANNER_H__ diff --git a/runtimes/neurun/src/backend/cpu/PluginClassesAllocator.cc b/runtimes/neurun/src/backend/cpu/PluginClassesAllocator.cc new file mode 100644 index 000000000..26d4d8858 --- /dev/null +++ b/runtimes/neurun/src/backend/cpu/PluginClassesAllocator.cc @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <memory> +#include "TensorBuilder.h" +#include "StageGenerator.h" +#include "Config.h" +#include "util/logging.h" + +extern "C" { +neurun::backend::cpu::TensorBuilder *allocate_TensorBuilder() +{ + VERBOSE(allocate_TensorBuilder) << "loaded from CPU\n"; + return new neurun::backend::cpu::TensorBuilder; +} + +neurun::backend::cpu::StageGenerator * +allocate_StageGenerator(const neurun::model::operand::Set &operand_ctx, + const std::shared_ptr<neurun::backend::cpu::TensorBuilder> &tensor_builder) +{ + VERBOSE(allocate_StageGenerator) << "loaded from CPU\n"; + return new neurun::backend::cpu::StageGenerator(operand_ctx, tensor_builder); +} + +neurun::backend::cpu::Config *allocate_Config() +{ + VERBOSE(allocate_Config) << "loaded from CPU\n"; + return new neurun::backend::cpu::Config; +} +} diff --git a/runtimes/neurun/src/backend/cpu/StageGenerator.cc b/runtimes/neurun/src/backend/cpu/StageGenerator.cc index b7a3fa24a..c53b320a4 100644 --- a/runtimes/neurun/src/backend/cpu/StageGenerator.cc +++ b/runtimes/neurun/src/backend/cpu/StageGenerator.cc @@ -18,7 +18,8 @@ #include <stdexcept> -#include "internal/Padding.h" +#include "cpp14/memory.h" +#include "util/Padding.h" #include "kernel/cpu/OperationUtils.h" #include "kernel/cpu/ConvolutionLayer.h" #include "kernel/cpu/AvgPoolLayer.h" @@ -27,12 +28,13 @@ #include "kernel/cpu/FullyConnectedLayer.h" #include "kernel/cpu/ReshapeLayer.h" #include "kernel/cpu/SoftMaxLayer.h" +#include "kernel/cpu/PermuteLayer.h" +#include "backend/BackendManager.h" +#include "backend/interface/IConfig.h" -#include "logging.h" +#include "util/logging.h" -#include "support/nnapi/Utils.h" - -#include "logging.h" +#include "util/Utils.h" namespace neurun { @@ -41,25 +43,27 @@ namespace backend namespace cpu { -StageGenerator::StageGenerator(const neurun::graph::operand::Set &operand_ctx, +StageGenerator::StageGenerator(const neurun::model::operand::Set &operand_ctx, const std::shared_ptr<TensorBuilder> &tensor_builder) : _ctx(operand_ctx), _tensor_builder(tensor_builder) { // DO NOTHING } -Stage StageGenerator::generate(const graph::operation::Conv2D::Implicit::Node &node) +void StageGenerator::visit(const model::operation::Conv2DNode &node) { - const ::neurun::graph::operand::Index ofm_index{node.getOutputs().at(0)}; - const ::neurun::graph::operand::Index ifm_index{node.getInputs().at(0)}; - const ::neurun::graph::operand::Index ker_index{node.getInputs().at(1)}; - const ::neurun::graph::operand::Index bias_index{node.getInputs().at(2)}; + using model::operation::Conv2DNode; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(Conv2DNode::Input::INPUT)}; + const auto ker_index{node.getInputs().at(Conv2DNode::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(Conv2DNode::Input::BIAS)}; - const ::neurun::graph::operand::Index vstride_index{node.param().vstride_index}; - const ::neurun::graph::operand::Index hstride_index{node.param().hstride_index}; + const auto vstride_index{node.param().vstride_index}; + const auto hstride_index{node.param().hstride_index}; - const ::neurun::graph::operand::Index padding_index{node.param().padding_index}; - const ::neurun::graph::operand::Index activation_index{node.param().activation_index}; + const auto padding_index{node.param().padding_index}; + const auto activation_index{node.param().activation_index}; const PaddingCode padding_type = static_cast<PaddingCode>(_ctx.at(padding_index).asScalar<int32_t>()); @@ -67,7 +71,7 @@ Stage StageGenerator::generate(const graph::operation::Conv2D::Implicit::Node &n assert((ANEURALNETWORKS_PADDING_SAME == padding_type) || (ANEURALNETWORKS_PADDING_VALID == padding_type)); - ::internal::Stride stride; + util::Stride stride; stride.vertical = _ctx.at(vstride_index).asScalar<int32_t>(); stride.horizontal = _ctx.at(hstride_index).asScalar<int32_t>(); @@ -75,28 +79,28 @@ Stage StageGenerator::generate(const graph::operation::Conv2D::Implicit::Node &n // Construct operation parameters struct Param { - int ofm_index; - int ifm_index; - int ker_index; - int bias_index; + model::operand::Index ofm_index; + model::operand::Index ifm_index; + model::operand::Index ker_index; + model::operand::Index bias_index; ::neurun::kernel::cpu::Shape ofm_shape; ::neurun::kernel::cpu::Shape ifm_shape; ::neurun::kernel::cpu::Shape ker_shape; ::neurun::kernel::cpu::Shape bias_shape; - ::internal::Padding padding; - ::internal::Stride stride; + util::Padding padding; + util::Stride stride; FuseCode activation; }; Param param; - param.ofm_index = ofm_index.asInt(); - param.ifm_index = ifm_index.asInt(); - param.ker_index = ker_index.asInt(); - param.bias_index = bias_index.asInt(); + param.ofm_index = ofm_index; + param.ifm_index = ifm_index; + param.ker_index = ker_index; + param.bias_index = bias_index; param.ofm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(ofm_index)); param.ifm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(ifm_index)); @@ -105,21 +109,21 @@ Stage StageGenerator::generate(const graph::operation::Conv2D::Implicit::Node &n param.stride = stride; param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME) - ? ::internal::same_padding(_ctx.at(ifm_index).shape().asFeature(), - _ctx.at(ofm_index).shape().asFeature(), stride, - _ctx.at(ker_index).shape().asKernel().W, - _ctx.at(ker_index).shape().asKernel().H) - : ::internal::valid_padding(); + ? util::same_padding(_ctx.at(ifm_index).shape().asFeature(), + _ctx.at(ofm_index).shape().asFeature(), stride, + _ctx.at(ker_index).shape().asKernel().W, + _ctx.at(ker_index).shape().asKernel().H) + : util::valid_padding(); param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>()); auto tensors = _tensor_builder; - return [tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(::neurun::graph::operand::Index{param.ofm_index}); - auto ifm_alloc = tensors->at(::neurun::graph::operand::Index{param.ifm_index}); - auto ker_alloc = tensors->at(::neurun::graph::operand::Index{param.ker_index}); - auto bias_alloc = tensors->at(::neurun::graph::operand::Index{param.bias_index}); + returnStage([tensors, param](IExecutionBuilder &builder) { + auto ofm_alloc = tensors->at(param.ofm_index); + auto ifm_alloc = tensors->at(param.ifm_index); + auto ker_alloc = tensors->at(param.ker_index); + auto bias_alloc = tensors->at(param.bias_index); std::unique_ptr<::neurun::kernel::cpu::ConvolutionLayer> fn{ new ::neurun::kernel::cpu::ConvolutionLayer}; @@ -130,24 +134,22 @@ Stage StageGenerator::generate(const graph::operation::Conv2D::Implicit::Node &n param.stride.vertical, param.activation, ofm_alloc->buffer(), param.ofm_shape); builder.append(std::move(fn)); - }; + }); } -Stage StageGenerator::generate(const graph::operation::MaxPool2D::Implicit::Node &node) +void StageGenerator::visit(const model::operation::MaxPool2DNode &node) { - VERBOSE(MaxPool2D) << "generate CPU MaxPool2D" << std::endl; + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::MaxPool2DNode::Input::INPUT)}; - const ::neurun::graph::operand::Index ofm_index{node.getOutputs().at(0)}; - const ::neurun::graph::operand::Index ifm_index{node.getInputs().at(0)}; + const auto kh_index{node.param().kh_index}; + const auto kw_index{node.param().kw_index}; - const ::neurun::graph::operand::Index kh_index{node.param().kh_index}; - const ::neurun::graph::operand::Index kw_index{node.param().kw_index}; + const auto vstride_index{node.param().vstride_index}; + const auto hstride_index{node.param().hstride_index}; - const ::neurun::graph::operand::Index vstride_index{node.param().vstride_index}; - const ::neurun::graph::operand::Index hstride_index{node.param().hstride_index}; - - const ::neurun::graph::operand::Index padding_index{node.param().padding_index}; - const ::neurun::graph::operand::Index activation_index{node.param().activation_index}; + const auto padding_index{node.param().padding_index}; + const auto activation_index{node.param().activation_index}; const int32_t kh = _ctx.at(kh_index).asScalar<int32_t>(); const int32_t kw = _ctx.at(kw_index).asScalar<int32_t>(); @@ -161,8 +163,8 @@ Stage StageGenerator::generate(const graph::operation::MaxPool2D::Implicit::Node // Construct operation parameters struct Param { - int ofm_index; - int ifm_index; + model::operand::Index ofm_index; + model::operand::Index ifm_index; uint32_t kw; uint32_t kh; @@ -170,16 +172,16 @@ Stage StageGenerator::generate(const graph::operation::MaxPool2D::Implicit::Node ::neurun::kernel::cpu::Shape ofm_shape; ::neurun::kernel::cpu::Shape ifm_shape; - ::internal::Padding padding; - ::internal::Stride stride; + util::Padding padding; + util::Stride stride; FuseCode activation; }; Param param; - param.ofm_index = ofm_index.asInt(); - param.ifm_index = ifm_index.asInt(); + param.ofm_index = ofm_index; + param.ifm_index = ifm_index; param.kh = kh; param.kw = kw; @@ -192,30 +194,17 @@ Stage StageGenerator::generate(const graph::operation::MaxPool2D::Implicit::Node param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME) - ? ::internal::same_padding(_ctx.at(ifm_index).shape().asFeature(), - _ctx.at(ofm_index).shape().asFeature(), param.stride, kw, kh) - : ::internal::valid_padding(); + ? util::same_padding(_ctx.at(ifm_index).shape().asFeature(), + _ctx.at(ofm_index).shape().asFeature(), param.stride, kw, kh) + : util::valid_padding(); param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>()); - VERBOSE(MaxPool2D) << "IFM_H: " << _ctx.at(ifm_index).shape().asFeature().H << std::endl; - VERBOSE(MaxPool2D) << "IFM_W: " << _ctx.at(ifm_index).shape().asFeature().W << std::endl; - VERBOSE(MaxPool2D) << "OFM_H: " << _ctx.at(ofm_index).shape().asFeature().H << std::endl; - VERBOSE(MaxPool2D) << "OFM_W: " << _ctx.at(ofm_index).shape().asFeature().W << std::endl; - VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl; - VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl; - VERBOSE(MaxPool2D) << "STRIDE_H: " << vstride << std::endl; - VERBOSE(MaxPool2D) << "STRIDE_W: " << hstride << std::endl; - VERBOSE(MaxPool2D) << "PAD(T): " << param.padding.top << std::endl; - VERBOSE(MaxPool2D) << "PAD(B): " << param.padding.bottom << std::endl; - VERBOSE(MaxPool2D) << "PAD(L): " << param.padding.left << std::endl; - VERBOSE(MaxPool2D) << "PAD(R): " << param.padding.right << std::endl; - auto tensors = _tensor_builder; - return [tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(::neurun::graph::operand::Index{param.ofm_index}).get(); - auto ifm_alloc = tensors->at(::neurun::graph::operand::Index{param.ifm_index}).get(); + returnStage([tensors, param](IExecutionBuilder &builder) { + auto ofm_alloc = tensors->at(param.ofm_index).get(); + auto ifm_alloc = tensors->at(param.ifm_index).get(); std::unique_ptr<::neurun::kernel::cpu::MaxPoolLayer> fn{ new ::neurun::kernel::cpu::MaxPoolLayer}; @@ -226,24 +215,22 @@ Stage StageGenerator::generate(const graph::operation::MaxPool2D::Implicit::Node param.ofm_shape); builder.append(std::move(fn)); - }; + }); } -Stage StageGenerator::generate(const graph::operation::AvgPool2D::Implicit::Node &node) +void StageGenerator::visit(const model::operation::AvgPool2DNode &node) { - VERBOSE(AvgPool2D) << "generate CPU AvgPool2D" << std::endl; - - const ::neurun::graph::operand::Index ofm_index{node.getOutputs().at(0)}; - const ::neurun::graph::operand::Index ifm_index{node.getInputs().at(0)}; + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::AvgPool2DNode::Input::INPUT)}; - const ::neurun::graph::operand::Index kh_index{node.param().kh_index}; - const ::neurun::graph::operand::Index kw_index{node.param().kw_index}; + const auto kh_index{node.param().kh_index}; + const auto kw_index{node.param().kw_index}; - const ::neurun::graph::operand::Index vstride_index{node.param().vstride_index}; - const ::neurun::graph::operand::Index hstride_index{node.param().hstride_index}; + const auto vstride_index{node.param().vstride_index}; + const auto hstride_index{node.param().hstride_index}; - const ::neurun::graph::operand::Index padding_index{node.param().padding_index}; - const ::neurun::graph::operand::Index activation_index{node.param().activation_index}; + const auto padding_index{node.param().padding_index}; + const auto activation_index{node.param().activation_index}; const int32_t kh = _ctx.at(kh_index).asScalar<int32_t>(); const int32_t kw = _ctx.at(kw_index).asScalar<int32_t>(); @@ -260,8 +247,8 @@ Stage StageGenerator::generate(const graph::operation::AvgPool2D::Implicit::Node // Construct operation parameters struct Param { - int ofm_index; - int ifm_index; + model::operand::Index ofm_index; + model::operand::Index ifm_index; uint32_t kw; uint32_t kh; @@ -269,16 +256,16 @@ Stage StageGenerator::generate(const graph::operation::AvgPool2D::Implicit::Node ::neurun::kernel::cpu::Shape ofm_shape; ::neurun::kernel::cpu::Shape ifm_shape; - ::internal::Padding padding; - ::internal::Stride stride; + util::Padding padding; + util::Stride stride; FuseCode activation; }; Param param; - param.ofm_index = ofm_index.asInt(); - param.ifm_index = ifm_index.asInt(); + param.ofm_index = ofm_index; + param.ifm_index = ifm_index; param.kh = kh; param.kw = kw; @@ -291,31 +278,17 @@ Stage StageGenerator::generate(const graph::operation::AvgPool2D::Implicit::Node param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME) - ? ::internal::same_padding(_ctx.at(ifm_index).shape().asFeature(), - _ctx.at(ofm_index).shape().asFeature(), param.stride, kw, kh) - : ::internal::valid_padding(); + ? util::same_padding(_ctx.at(ifm_index).shape().asFeature(), + _ctx.at(ofm_index).shape().asFeature(), param.stride, kw, kh) + : util::valid_padding(); param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>()); - VERBOSE(AvgPool2D) << "IFM_H: " << _ctx.at(ifm_index).shape().asFeature().H << std::endl; - VERBOSE(AvgPool2D) << "IFM_W: " << _ctx.at(ifm_index).shape().asFeature().W << std::endl; - VERBOSE(AvgPool2D) << "OFM_H: " << _ctx.at(ofm_index).shape().asFeature().H << std::endl; - VERBOSE(AvgPool2D) << "OFM_W: " << _ctx.at(ofm_index).shape().asFeature().W << std::endl; - VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl; - VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl; - VERBOSE(AvgPool2D) << "STRIDE_H: " << vstride << std::endl; - VERBOSE(AvgPool2D) << "STRIDE_W: " << hstride << std::endl; - VERBOSE(AvgPool2D) << "PAD: " << ::nnfw::support::nnapi::to_string(padding_type) << std::endl; - VERBOSE(AvgPool2D) << "PAD(T): " << param.padding.top << std::endl; - VERBOSE(AvgPool2D) << "PAD(B): " << param.padding.bottom << std::endl; - VERBOSE(AvgPool2D) << "PAD(L): " << param.padding.left << std::endl; - VERBOSE(AvgPool2D) << "PAD(R): " << param.padding.right << std::endl; - auto tensors = _tensor_builder; - return [tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(::neurun::graph::operand::Index{param.ofm_index}).get(); - auto ifm_alloc = tensors->at(::neurun::graph::operand::Index{param.ifm_index}).get(); + returnStage([tensors, param](IExecutionBuilder &builder) { + auto ofm_alloc = tensors->at(param.ofm_index).get(); + auto ifm_alloc = tensors->at(param.ifm_index).get(); std::unique_ptr<::neurun::kernel::cpu::AvgPoolLayer> fn{ new ::neurun::kernel::cpu::AvgPoolLayer}; @@ -326,20 +299,18 @@ Stage StageGenerator::generate(const graph::operation::AvgPool2D::Implicit::Node param.ofm_shape); builder.append(std::move(fn)); - }; + }); } -Stage StageGenerator::generate(const graph::operation::Concat::Node &node) +void StageGenerator::visit(const model::operation::ConcatNode &node) { - VERBOSE(Concat) << "generate CPU Concat" << std::endl; - - const ::neurun::graph::operand::Index ofm_index{node.getOutputs().at(0)}; - const ::neurun::graph::operand::Index axis_index{node.param().axis_index}; + const auto ofm_index{node.getOutputs().at(0)}; + const auto axis_index{node.param().axis_index}; struct Param { - int32_t output_index; - std::vector<int32_t> input_indexes; + model::operand::Index output_index; + std::vector<model::operand::Index> input_indexes; int32_t axis; @@ -349,10 +320,10 @@ Stage StageGenerator::generate(const graph::operation::Concat::Node &node) Param param; - param.output_index = ofm_index.asInt(); + param.output_index = ofm_index; for (const auto &e : node.getInputs()) { - param.input_indexes.emplace_back(e.asInt()); + param.input_indexes.emplace_back(e); } param.axis = _ctx.at(axis_index).asScalar<int32_t>(); @@ -365,14 +336,13 @@ Stage StageGenerator::generate(const graph::operation::Concat::Node &node) auto tensors = _tensor_builder; - return [tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(::neurun::graph::operand::Index{param.output_index}).get(); + returnStage([tensors, param](IExecutionBuilder &builder) { + auto output_alloc = tensors->at(param.output_index).get(); std::vector<const uint8_t *> input_buffers; for (auto ifm_ind : param.input_indexes) { - input_buffers.emplace_back( - tensors->at(::neurun::graph::operand::Index{ifm_ind}).get()->buffer()); + input_buffers.emplace_back(tensors->at(ifm_ind).get()->buffer()); } std::unique_ptr<::neurun::kernel::cpu::ConcatLayer> fn{new ::neurun::kernel::cpu::ConcatLayer}; @@ -381,26 +351,26 @@ Stage StageGenerator::generate(const graph::operation::Concat::Node &node) param.ofm_shape); builder.append(std::move(fn)); - }; + }); } -Stage StageGenerator::generate(const graph::operation::FullyConnected::Node &node) +void StageGenerator::visit(const model::operation::FullyConnectedNode &node) { - VERBOSE(FullyConnected) << "generate CPU FullyConnected" << std::endl; + using model::operation::FullyConnectedNode; - const ::neurun::graph::operand::Index output_index{node.getOutputs().at(0)}; - const ::neurun::graph::operand::Index input_index{node.getInputs().at(0)}; - const ::neurun::graph::operand::Index weight_index{node.getInputs().at(1)}; - const ::neurun::graph::operand::Index bias_index{node.getInputs().at(2)}; - const ::neurun::graph::operand::Index activation_index{node.param().activation_index}; + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(FullyConnectedNode::Input::INPUT)}; + const auto weight_index{node.getInputs().at(FullyConnectedNode::Input::WEIGHT)}; + const auto bias_index{node.getInputs().at(FullyConnectedNode::Input::BIAS)}; + const auto activation_index{node.param().activation_index}; // Construct operation parameters struct Param { - int output_index; - int input_index; - int weight_index; - int bias_index; + model::operand::Index output_index; + model::operand::Index input_index; + model::operand::Index weight_index; + model::operand::Index bias_index; ::neurun::kernel::cpu::Shape ofm_shape; ::neurun::kernel::cpu::Shape ifm_shape; @@ -412,10 +382,10 @@ Stage StageGenerator::generate(const graph::operation::FullyConnected::Node &nod Param param; - param.output_index = output_index.asInt(); - param.input_index = input_index.asInt(); - param.weight_index = weight_index.asInt(); - param.bias_index = bias_index.asInt(); + param.output_index = output_index; + param.input_index = input_index; + param.weight_index = weight_index; + param.bias_index = bias_index; param.ofm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(output_index)); param.ifm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(input_index)); @@ -426,11 +396,11 @@ Stage StageGenerator::generate(const graph::operation::FullyConnected::Node &nod auto tensors = _tensor_builder; - return [tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(::neurun::graph::operand::Index{param.output_index}).get(); - auto input_alloc = tensors->at(::neurun::graph::operand::Index{param.input_index}).get(); - auto weight_alloc = tensors->at(::neurun::graph::operand::Index{param.weight_index}).get(); - auto bias_alloc = tensors->at(::neurun::graph::operand::Index{param.bias_index}).get(); + returnStage([tensors, param](IExecutionBuilder &builder) { + auto output_alloc = tensors->at(param.output_index).get(); + auto input_alloc = tensors->at(param.input_index).get(); + auto weight_alloc = tensors->at(param.weight_index).get(); + auto bias_alloc = tensors->at(param.bias_index).get(); std::unique_ptr<::neurun::kernel::cpu::FullyConnectedLayer> fn{ new ::neurun::kernel::cpu::FullyConnectedLayer}; @@ -440,18 +410,18 @@ Stage StageGenerator::generate(const graph::operation::FullyConnected::Node &nod output_alloc->buffer(), param.ofm_shape); builder.append(std::move(fn)); - }; + }); } -Stage StageGenerator::generate(const graph::operation::Reshape::Node &node) +void StageGenerator::visit(const model::operation::ReshapeNode &node) { - const ::neurun::graph::operand::Index output_index{node.getOutputs().at(0)}; - const ::neurun::graph::operand::Index input_index{node.getInputs().at(0)}; + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(model::operation::ReshapeNode::Input::INPUT)}; struct Param { - int output_index; - int input_index; + model::operand::Index output_index; + model::operand::Index input_index; ::neurun::kernel::cpu::Shape ofm_shape; ::neurun::kernel::cpu::Shape ifm_shape; @@ -459,17 +429,17 @@ Stage StageGenerator::generate(const graph::operation::Reshape::Node &node) Param param; - param.output_index = output_index.asInt(); - param.input_index = input_index.asInt(); + param.output_index = output_index; + param.input_index = input_index; param.ofm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(output_index)); param.ifm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(input_index)); auto tensors = _tensor_builder; - return [tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(::neurun::graph::operand::Index{param.output_index}).get(); - auto input_alloc = tensors->at(::neurun::graph::operand::Index{param.input_index}).get(); + returnStage([tensors, param](IExecutionBuilder &builder) { + auto output_alloc = tensors->at(param.output_index).get(); + auto input_alloc = tensors->at(param.input_index).get(); std::unique_ptr<::neurun::kernel::cpu::ReshapeLayer> fn{ new ::neurun::kernel::cpu::ReshapeLayer}; @@ -477,21 +447,19 @@ Stage StageGenerator::generate(const graph::operation::Reshape::Node &node) fn->configure(input_alloc->buffer(), param.ifm_shape, output_alloc->buffer(), param.ofm_shape); builder.append(std::move(fn)); - }; + }); } -Stage StageGenerator::generate(const graph::operation::Softmax::Node &node) +void StageGenerator::visit(const model::operation::SoftmaxNode &node) { - VERBOSE(Softmax) << "generate CPU Softmax" << std::endl; - - const ::neurun::graph::operand::Index output_index{node.getOutputs().at(0)}; - const ::neurun::graph::operand::Index input_index{node.getInputs().at(0)}; - const ::neurun::graph::operand::Index scale_index{node.param().scale_index}; + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(model::operation::SoftmaxNode::Input::INPUT)}; + const auto scale_index{node.param().scale_index}; struct Param { - int output_index; - int input_index; + model::operand::Index output_index; + model::operand::Index input_index; ::neurun::kernel::cpu::Shape ofm_shape; ::neurun::kernel::cpu::Shape ifm_shape; @@ -501,8 +469,8 @@ Stage StageGenerator::generate(const graph::operation::Softmax::Node &node) Param param; - param.output_index = output_index.asInt(); - param.input_index = input_index.asInt(); + param.output_index = output_index; + param.input_index = input_index; param.ofm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(output_index)); param.ifm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(input_index)); @@ -511,9 +479,9 @@ Stage StageGenerator::generate(const graph::operation::Softmax::Node &node) auto tensors = _tensor_builder; - return [tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(::neurun::graph::operand::Index{param.output_index}).get(); - auto input_alloc = tensors->at(::neurun::graph::operand::Index{param.input_index}).get(); + returnStage([tensors, param](IExecutionBuilder &builder) { + auto output_alloc = tensors->at(param.output_index).get(); + auto input_alloc = tensors->at(param.input_index).get(); std::unique_ptr<::neurun::kernel::cpu::SoftMaxLayer> fn{ new ::neurun::kernel::cpu::SoftMaxLayer}; @@ -522,15 +490,58 @@ Stage StageGenerator::generate(const graph::operation::Softmax::Node &node) param.ofm_shape); builder.append(std::move(fn)); - }; + }); } -Stage StageGenerator::generate(const graph::operation::NOP::Node & /* node */) +void StageGenerator::visit(const model::operation::PermuteNode &node) { - // DO NOTHING - return nullptr; + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(0)}; + + using PermuteType = model::operation::PermuteNode::Type; + + struct Param + { + model::operand::Index output_index; + model::operand::Index input_index; + + model::operand::Shape shape; + + PermuteType type{PermuteType::COPY}; + }; + + Param param; + + param.output_index = output_index; + param.input_index = input_index; + + param.shape = _ctx.at(output_index).shape(); + param.type = node.param().type; + + // assert(param.shape == _ctx.at(input_index)); + + const auto &input_li = _ctx.at(input_index).lower_info(); + const auto &output_li = _ctx.at(output_index).lower_info(); + const auto input_backend = input_li->def_backends().getOnlyElement(); + const auto output_backend = output_li->def_backends().getOnlyElement(); + + const auto input_tensors = input_backend->tensor_builder(); + const auto output_tensors = output_backend->tensor_builder(); + + returnStage([input_tensors, output_tensors, param](IExecutionBuilder &builder) { + auto output_object = output_tensors->wrapTensor(param.output_index); + auto input_object = input_tensors->wrapTensor(param.input_index); + + auto fn = nnfw::cpp14::make_unique<::neurun::kernel::cpu::PermuteLayer>(); + + fn->configure(input_object, output_object, param.shape, param.type); + + builder.append(std::move(fn)); + }); } +void StageGenerator::visit(const model::operation::AddNode &) { throw std::runtime_error("NYI"); } + } // namespace neurun } // namespace backend } // namespace cpu diff --git a/runtimes/neurun/src/backend/cpu/StageGenerator.h b/runtimes/neurun/src/backend/cpu/StageGenerator.h index acdd2c8b2..6a0e387da 100644 --- a/runtimes/neurun/src/backend/cpu/StageGenerator.h +++ b/runtimes/neurun/src/backend/cpu/StageGenerator.h @@ -17,9 +17,9 @@ #ifndef __NEURUN_BACKEND_CPU_STAGE_GENERATOR_H__ #define __NEURUN_BACKEND_CPU_STAGE_GENERATOR_H__ -#include "backend/IStageGenerator.h" +#include "backend/interface/IStageGenerator.h" -#include "graph/operand/Set.h" +#include "model/operand/Set.h" #include "backend/cpu/operand/Tensor.h" #include "TensorBuilder.h" @@ -33,22 +33,18 @@ namespace cpu class StageGenerator : public IStageGenerator { public: - StageGenerator(const neurun::graph::operand::Set &ctx, + StageGenerator(const neurun::model::operand::Set &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder); virtual std::shared_ptr<ITensorBuilder> tensor_builder() override { return _tensor_builder; } - virtual Stage generate(const graph::operation::Conv2D::Implicit::Node &node) override; - virtual Stage generate(const graph::operation::MaxPool2D::Implicit::Node &node) override; - virtual Stage generate(const graph::operation::AvgPool2D::Implicit::Node &node) override; - virtual Stage generate(const graph::operation::Concat::Node &node) override; - virtual Stage generate(const graph::operation::FullyConnected::Node &node) override; - virtual Stage generate(const graph::operation::Reshape::Node &node) override; - virtual Stage generate(const graph::operation::Softmax::Node &node) override; - virtual Stage generate(const graph::operation::NOP::Node &node) override; +#define OP(InternalName, IsNnApi, NnApiName) \ + virtual void visit(const model::operation::InternalName &) override; +#include "model/operation/Op.lst" +#undef OP private: - const neurun::graph::operand::Set &_ctx; + const neurun::model::operand::Set &_ctx; std::shared_ptr<TensorBuilder> _tensor_builder; }; diff --git a/runtimes/neurun/src/backend/cpu/TensorBuilder.cc b/runtimes/neurun/src/backend/cpu/TensorBuilder.cc index 1b972a830..9c39b9c00 100644 --- a/runtimes/neurun/src/backend/cpu/TensorBuilder.cc +++ b/runtimes/neurun/src/backend/cpu/TensorBuilder.cc @@ -19,6 +19,7 @@ #include <cassert> #include "operand/Object.h" +#include "util/logging.h" namespace neurun { @@ -27,43 +28,93 @@ namespace backend namespace cpu { -TensorBuilder::TensorBuilder() +TensorBuilder::TensorBuilder() : _mem_planner(std::make_shared<FirstFitPlanner>()) { // DO NOTHING } -void TensorBuilder::mark(const ::neurun::graph::operand::Index &ind) +void TensorBuilder::registerTensorInfo(const model::operand::Index &ind, + const compiler::TensorInfo &info) { - assert(_tensors.size() == 0); + _tensor_info_map.insert({ind, info}); +} + +void TensorBuilder::registerSubTensorInfo(const model::operand::Index &, + const compiler::SubTensorInfo &) +{ + // Not supported yet + assert(false); +} + +void TensorBuilder::notifyFirstUse(const model::operand::Index &ind) +{ + assert(_tensor_info_map.find(ind) != _tensor_info_map.end()); + const auto &info = _tensor_info_map.at(ind); - _inds.insert(ind); + const auto size = info.total_size(); + _mem_planner->claim(ind, size); } -void TensorBuilder::prepare(codegen::Plan &plan, - const std::map<int, ::arm_compute::TensorInfo> &tensor_info_ctx) +void TensorBuilder::notifyLastUse(const model::operand::Index &ind) { _mem_planner->release(ind); } + +void TensorBuilder::prepare(void) { assert(_tensors.size() == 0); - for (auto ind_int : _inds) + _mem_alloc = std::make_shared<Allocator>(_mem_planner->capacity()); + assert(_mem_alloc->base()); + + for (auto &mem_plan : _mem_planner->memory_plans()) { - ::neurun::graph::operand::Index ind{ind_int}; - auto tensor = std::make_shared<operand::Tensor>(tensor_info_ctx.at(ind.asInt())); - // TODO Fix allocation here. When Tensor object is created the memory for tensor is also - // allocated, and this must be fixed. - plan.operands().set(ind, std::make_shared<operand::Object>(tensor)); + auto ind = mem_plan.first; + auto mem_blk = mem_plan.second; + const auto &info = _tensor_info_map[ind]; + + uint8_t *buffer = _mem_alloc->base() + mem_blk.offset; + auto tensor = std::make_shared<operand::Tensor>(info); + tensor->setBuffer(buffer); _tensors[ind] = tensor; + + VERBOSE(CPU_TENSORBUILDER) << "TENSOR(#" << ind.value() << "): " << static_cast<void *>(buffer) + << std::endl; + + // If we do not make tensor here currently, stages would cause segment fault } } void TensorBuilder::allocate(void) { - assert(_inds.size() == _tensors.size()); - // NOTE For now nothing to do. Allocation is done in prepare stage, which is wrong - // See also: comment in `prepare()` } -std::shared_ptr<operand::Tensor> TensorBuilder::at(const ::neurun::graph::operand::Index &ind) +std::shared_ptr<::neurun::backend::operand::ITensor> +TensorBuilder::tensorAt(const model::operand::Index &ind) +{ + return _tensors.at(ind); +} + +std::shared_ptr<backend::operand::IObject> +TensorBuilder::wrapTensor(const model::operand::Index &ind) +{ + if (_objects.find(ind) != _objects.end()) + { + return _objects.at(ind); + } + else + { + return _objects[ind] = std::make_shared<operand::Object>(_tensors.at(ind)); + } +} + +void TensorBuilder::iterate(const IterateFunction &fn) +{ + for (auto it : _tensors) + { + fn(it.first); + } +} + +std::shared_ptr<operand::Tensor> TensorBuilder::at(const ::neurun::model::operand::Index &ind) { return _tensors.at(ind); } diff --git a/runtimes/neurun/src/backend/cpu/TensorBuilder.h b/runtimes/neurun/src/backend/cpu/TensorBuilder.h index f61a930fe..2715d57f0 100644 --- a/runtimes/neurun/src/backend/cpu/TensorBuilder.h +++ b/runtimes/neurun/src/backend/cpu/TensorBuilder.h @@ -18,11 +18,12 @@ #define __NEURUN_BACKEND_CPU_TENSOR_BUILDER_H__ #include <unordered_map> -#include <unordered_set> -#include "backend/ITensorBuilder.h" +#include "backend/interface/ITensorBuilder.h" #include "backend/cpu/operand/Tensor.h" -#include "graph/operand/Index.h" +#include "backend/cpu/operand/Object.h" +#include "model/operand/Index.h" +#include "MemoryPlanner.h" namespace neurun { @@ -31,23 +32,47 @@ namespace backend namespace cpu { -class Plan; - class TensorBuilder : public ITensorBuilder { public: TensorBuilder(); - virtual void mark(const ::neurun::graph::operand::Index &ind) override; - virtual void prepare(codegen::Plan &plan, - const std::map<int, ::arm_compute::TensorInfo> &tensor_info_ctx) override; + /** + * @brief Register tensor information to allocate on CPU backend + * @param[in] ind Operand index + * @param[in] info Tensor information + */ + virtual void registerTensorInfo(const model::operand::Index &ind, + const compiler::TensorInfo &info) override; + /** + * @brief Register subtensor information to allocate on CPU backend + * @param[in] ind Operand index + * @param[in] info Tensor information + */ + virtual void registerSubTensorInfo(const model::operand::Index &ind, + const compiler::SubTensorInfo &info) override; + + virtual void notifyFirstUse(const model::operand::Index &) override; + virtual void notifyLastUse(const model::operand::Index &) override; + + virtual void prepare(void) override; virtual void allocate(void) override; - std::shared_ptr<operand::Tensor> at(const ::neurun::graph::operand::Index &ind); + virtual std::shared_ptr<::neurun::backend::operand::ITensor> + tensorAt(const model::operand::Index &ind) override; + virtual std::shared_ptr<backend::operand::IObject> + wrapTensor(const model::operand::Index &ind) override; + virtual void iterate(const IterateFunction &fn) override; + + std::shared_ptr<operand::Tensor> at(const ::neurun::model::operand::Index &ind); private: - std::unordered_set<graph::operand::Index> _inds; - std::unordered_map<graph::operand::Index, std::shared_ptr<operand::Tensor>> _tensors; + std::unordered_map<model::operand::Index, compiler::TensorInfo> _tensor_info_map; + std::unordered_map<model::operand::Index, std::shared_ptr<operand::Tensor>> _tensors; + std::unordered_map<model::operand::Index, std::shared_ptr<operand::Object>> _objects; + std::unordered_map<model::operand::Index, Block> _tensor_mem_map; + std::shared_ptr<IMemoryPlanner> _mem_planner; + std::shared_ptr<Allocator> _mem_alloc; }; } // namespace cpu diff --git a/runtimes/neurun/src/backend/cpu/operand/Object.cc b/runtimes/neurun/src/backend/cpu/operand/Object.cc index 52b63fba7..011747a8c 100644 --- a/runtimes/neurun/src/backend/cpu/operand/Object.cc +++ b/runtimes/neurun/src/backend/cpu/operand/Object.cc @@ -25,7 +25,8 @@ namespace cpu namespace operand { -void Object::access(const std::function<void(::arm_compute::ITensor &tensor)> &fn) const +void Object::access( + const std::function<void(::neurun::backend::operand::ITensor &tensor)> &fn) const { fn(*_tensor); } diff --git a/runtimes/neurun/src/backend/cpu/operand/Object.h b/runtimes/neurun/src/backend/cpu/operand/Object.h index 08f63f3dc..5ef7c4fbf 100644 --- a/runtimes/neurun/src/backend/cpu/operand/Object.h +++ b/runtimes/neurun/src/backend/cpu/operand/Object.h @@ -18,9 +18,9 @@ #define __NEURUN_BACKEND_CPU_OPERAND_OBJECT_H__ #include <memory> -#include <arm_compute/core/ITensor.h> +#include "backend/interface/operand/ITensor.h" -#include "backend/IObject.h" +#include "backend/interface/operand/IObject.h" namespace neurun { @@ -37,19 +37,20 @@ public: Object() = default; public: - Object(const std::shared_ptr<::arm_compute::ITensor> &tensor) : _tensor{tensor} + Object(const std::shared_ptr<::neurun::backend::operand::ITensor> &tensor) : _tensor{tensor} { // DO NOTHING } public: - ::arm_compute::ITensor *ptr(void) const override { return _tensor.get(); } + ::neurun::backend::operand::ITensor *ptr(void) const override { return _tensor.get(); } private: - std::shared_ptr<::arm_compute::ITensor> _tensor; + std::shared_ptr<::neurun::backend::operand::ITensor> _tensor; public: - void access(const std::function<void(::arm_compute::ITensor &tensor)> &fn) const override; + void + access(const std::function<void(::neurun::backend::operand::ITensor &tensor)> &fn) const override; }; } // namespace operand diff --git a/runtimes/neurun/src/backend/cpu/operand/Tensor.cc b/runtimes/neurun/src/backend/cpu/operand/Tensor.cc index 0e4f34aac..a5251292e 100644 --- a/runtimes/neurun/src/backend/cpu/operand/Tensor.cc +++ b/runtimes/neurun/src/backend/cpu/operand/Tensor.cc @@ -16,6 +16,8 @@ #include "Tensor.h" +#define NO_USE(a) (void)(a) + namespace neurun { namespace backend @@ -25,7 +27,11 @@ namespace cpu namespace operand { -// NO IMPLEMENTATION YET +size_t Tensor::calcOffset(const neurun::util::feature::Coordinate4D &coords) +{ + NO_USE(coords); + throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now."); +} } // namespace operand } // namespace cpu diff --git a/runtimes/neurun/src/backend/cpu/operand/Tensor.h b/runtimes/neurun/src/backend/cpu/operand/Tensor.h index 83a99acf2..7500f890f 100644 --- a/runtimes/neurun/src/backend/cpu/operand/Tensor.h +++ b/runtimes/neurun/src/backend/cpu/operand/Tensor.h @@ -17,8 +17,8 @@ #ifndef __NEURUN_BACKEND_CPU_OPERAND_TENSOR_H__ #define __NEURUN_BACKEND_CPU_OPERAND_TENSOR_H__ -#include <arm_compute/core/ITensor.h> -#include <arm_compute/core/TensorInfo.h> +#include "backend/interface/operand/ITensor.h" +#include "compiler/TensorInfo.h" namespace neurun { @@ -29,38 +29,40 @@ namespace cpu namespace operand { -class Tensor : public ::arm_compute::ITensor +class Tensor : public ::neurun::backend::operand::ITensor { public: - Tensor() = default; + Tensor() = delete; - Tensor(::arm_compute::TensorInfo info) : _info(info) - { - // TODO Do not allocate buffer here. This tensor is just an abstract Tensor object for cpu. - uint32_t size = _info.total_size(); // NOTE This size may not be accurate - _buffer = new uint8_t[size]; // NOTE The allocated buffer is never deallocated. - } - - Tensor(uint8_t *buffer) : _buffer(buffer) +public: + Tensor(const compiler::TensorInfo &info) : _info(info) { // DO NOTHING } public: void setBuffer(uint8_t *buffer) { _buffer = buffer; } + ::neurun::model::operand::DataType data_type() const { return _info.typeInfo().type(); } public: - ::arm_compute::TensorInfo *info() const override - { - return const_cast<::arm_compute::TensorInfo *>(&_info); - } - - ::arm_compute::TensorInfo *info() override { return &_info; } - uint8_t *buffer() const override { return _buffer; } + /** + * @brief Get dimension by index + * + * @param index Index to get diemension + * @return size_t Dimension at index + * @note N : dimension(0) + * H : dimension(1) + * W : dimension(2) + * C : dimension(3) + */ + size_t dimension(size_t index) const override { return _info.shape().dim(index); } + size_t num_dimensions() const override { return _info.shape().dims().size(); } + size_t total_size() const override { return _info.total_size(); } + size_t calcOffset(const neurun::util::feature::Coordinate4D &coords) override; private: - ::arm_compute::TensorInfo _info; + compiler::TensorInfo _info; uint8_t *_buffer = nullptr; }; |