diff options
Diffstat (limited to 'runtimes/neurun/core/src/exec')
37 files changed, 4360 insertions, 0 deletions
diff --git a/runtimes/neurun/core/src/exec/DataflowExecutor.cc b/runtimes/neurun/core/src/exec/DataflowExecutor.cc new file mode 100644 index 000000000..75d616131 --- /dev/null +++ b/runtimes/neurun/core/src/exec/DataflowExecutor.cc @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DataflowExecutor.h" + +#include <cassert> + +#include "util/logging.h" + +namespace neurun +{ +namespace exec +{ + +int64_t DataflowExecutor::calculateRank(const std::vector<model::Element> &operations) +{ + int64_t rank = 0; + if (!_indexed_ranks) + { + return rank; + } + for (const auto &element : operations) + { + auto it = _indexed_ranks->find(element.index); + if (it == _indexed_ranks->end()) + { + assert(element.node->getName() == "Permute"); + // assign int32_t::max to prevent integer overflow + rank += std::numeric_limits<int32_t>::max(); + } + else + { + rank += it->second; + } + } + return rank; +} + +void DataflowExecutor::emplaceToReadyJobs(const uint32_t &id) +{ + auto &job = _waiting_jobs[id]; + assert(job != nullptr); + auto &subg = _subgraphs->at(_job_to_subgraph[job->index()]); + auto rank = calculateRank(subg.operations()); + _ready_jobs.emplace(rank, std::move(job)); +} + +void DataflowExecutor::notify(uint32_t finished_job_id) +{ + for (auto id : _output_info[finished_job_id]) + { + assert(_input_info[id] > 0); + auto count = --_input_info[id]; + if (count == 0) // No dependent jobs left, ready for execution + { + emplaceToReadyJobs(id); + } + } +} +bool DataflowExecutor::noWaitingJobs() +{ + return std::all_of(_waiting_jobs.begin(), _waiting_jobs.end(), + [](const std::unique_ptr<Job> &job) { return job == nullptr; }); +} + +DataflowExecutor::DataflowExecutor(const std::shared_ptr<const model::Model> &model, + std::unique_ptr<model::Subgraphs> subgraphs, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<graph::LowerInfoMap> lower_info, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, + CodeMap &&code_map) + : ExecutorBase{model, std::move(subgraphs), operand_context, std::move(lower_info), + std::move(tensor_mgrs)}, + _code_map{std::move(code_map)} +{ + VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl; + + assert(_subgraphs); + // Assign jobs convert SubgraphIndex to job index(uint32_t) + uint32_t next_job_index = 0; + std::unordered_map<model::SubgraphIndex, uint32_t> subgraph_to_job; + _subgraphs->iterate([&](const model::SubgraphIndex &subg_index, const model::Subgraph &) { + VERBOSE(DataflowExecutor) << "Create a job #" << next_job_index << " with SubgraphIndex " + << subg_index.value() << std::endl; + _finished_jobs.emplace_back( + nnfw::cpp14::make_unique<Job>(next_job_index, _code_map.at(subg_index).get(), + _lower_info->operation.at(subg_index)->backend())); + subgraph_to_job[subg_index] = next_job_index++; + }); + + _waiting_jobs.resize(next_job_index); + _output_info.resize(next_job_index); + _initial_input_info.resize(next_job_index, 0); + + _subgraphs->iterate([&](const model::SubgraphIndex &subg_index, const model::Subgraph &subg) { + auto job_index = subgraph_to_job[subg_index]; + for (auto output : subg.getOutputs()) + { + // Update output and input info + _subgraphs->iterate( + [&](const model::SubgraphIndex &subg_cur_index, const model::Subgraph &subg_cur) { + if (subg_cur.getInputs().contains(output)) + { + auto dep_index = subgraph_to_job[subg_cur_index]; + ++_initial_input_info[dep_index]; + _output_info[job_index].push_back(dep_index); + } + }); + } + }); + for (const auto &s : subgraph_to_job) + _job_to_subgraph.emplace(s.second, s.first); + + _input_info = _initial_input_info; +} + +void DataflowExecutor::executeImpl() +{ + assert(noWaitingJobs()); + + // Execution setup + _waiting_jobs.swap(_finished_jobs); // Move finished jobs to waiting jobs + + for (uint32_t i = 0; i < _waiting_jobs.size(); ++i) + { + if (_input_info[i] == 0) + { + emplaceToReadyJobs(i); + } + } + assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs + bool is_profiling = util::getConfigBool(util::config::PROFILING_MODE); + // TODO Fix indentation + { + // Notifiy Execution Begin + for (auto &o : _observers) + { + o->handleBegin(this); + } + } + + while (!_ready_jobs.empty()) + { + auto job = std::move((_ready_jobs.begin())->second); + _ready_jobs.erase(_ready_jobs.begin()); + auto job_index = job->index(); + VERBOSE(DataflowExecutor) << "Run job #" << job_index << std::endl; + notifyJobBegin(job_index); + if (is_profiling) + job->fn()->runSync(); + else + job->run(); + notifyJobEnd(job_index); + notify(job_index); + _finished_jobs[job_index] = std::move(job); + } + assert(noWaitingJobs()); + + for (auto &o : _observers) + { + o->handleEnd(this); + } + + // Reset input info for the next execution + _input_info = _initial_input_info; +} + +void DataflowExecutor::notifyJobBegin(uint32_t job_index) +{ + auto subgraph_index = _job_to_subgraph[job_index]; + // Workaround - assumes only one operation + auto node = _subgraphs->at(subgraph_index).operations().at(0).node; + const backend::Backend *backend = _lower_info->operation.at(subgraph_index)->backend(); + for (auto &o : _observers) + { + o->handleBegin(this, node, backend); + } +} + +void DataflowExecutor::notifyJobEnd(uint32_t job_index) +{ + auto subgraph_index = _job_to_subgraph[job_index]; + // Workaround - assumes only one operation + auto node = _subgraphs->at(subgraph_index).operations().at(0).node; + const backend::Backend *backend = _lower_info->operation.at(subgraph_index)->backend(); + for (auto &o : _observers) + { + o->handleEnd(this, node, backend); + } +} + +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/DataflowExecutor.h b/runtimes/neurun/core/src/exec/DataflowExecutor.h new file mode 100644 index 000000000..935f9976d --- /dev/null +++ b/runtimes/neurun/core/src/exec/DataflowExecutor.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_DATAFLOW_EXECUTOR_H__ +#define __NEURUN_EXEC_DATAFLOW_EXECUTOR_H__ + +#include <list> +#include <map> +#include <unordered_map> + +#include "FunctionSequence.h" +#include "Job.h" +#include "model/OperandIndexSequence.h" +#include "model/Index.h" +#include "model/Model.h" +#include "cpp14/memory.h" +#include "exec/ExecutorBase.h" + +namespace neurun +{ +namespace exec +{ + +class DataflowExecutor : public ExecutorBase +{ +public: + using CodeMap = std::unordered_map<model::SubgraphIndex, std::unique_ptr<FunctionSequence>>; + +protected: + virtual void notify(uint32_t finished_job_id); + bool noWaitingJobs(); + +public: + /** + * @brief Constructs a DataflowExecutor object + * + * @param model Model object + * @param operand_context (Only for input/output operand data access) + * @param lower_info LowerInfo object (Only to know input/output operands layout) + * @param code_map Compiled code map + * @param ranks Operation ranks for ordering execution + */ + DataflowExecutor(const std::shared_ptr<const model::Model> &model, + std::unique_ptr<model::Subgraphs> subgraphs, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<graph::LowerInfoMap> lower_info, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, CodeMap &&code_map); + + void executeImpl() override; + + void notifyJobEnd(uint32_t job_index); + void notifyJobBegin(uint32_t job_index); + + void addObserver(std::unique_ptr<IExecutionObserver> ref) + { + _observers.emplace_back(std::move(ref)); + }; + void removeObserver(std::unique_ptr<IExecutionObserver> ref) { _observers.remove(ref); }; + +protected: + int64_t calculateRank(const std::vector<model::Element> &operations); + void emplaceToReadyJobs(const uint32_t &id); + +protected: + CodeMap _code_map; + /** + * @brief A vector of finished jobs for current execution + * After a run it has all the jobs of this execution for the next run + */ + std::vector<std::unique_ptr<Job>> _finished_jobs; + /** + * @brief A vector of waiting jobs for current execution + * All the jobs are moved from #_finished_jobs to it when start a run + */ + std::vector<std::unique_ptr<Job>> _waiting_jobs; + /** + * @brief Jobs' output info + * Used for notifying after finishing a job + */ + std::vector<std::list<uint32_t>> _output_info; + std::vector<uint32_t> _initial_input_info; + std::vector<uint32_t> _input_info; + /** + * @brief A collection of jobs that are ready for execution + * Jobs in it are ready to be scheduled. + * Ordered by priority from `_indexed_ranks` + */ + std::multimap<int64_t, std::unique_ptr<Job>, std::greater<int64_t>> _ready_jobs; + + /// @brief Which job runs which op and function. + std::unordered_map<uint32_t, model::SubgraphIndex> _job_to_subgraph; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_DATAFLOW_EXECUTOR_H__ diff --git a/runtimes/neurun/core/src/exec/Execution.cc b/runtimes/neurun/core/src/exec/Execution.cc new file mode 100644 index 000000000..01114d8c8 --- /dev/null +++ b/runtimes/neurun/core/src/exec/Execution.cc @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/Execution.h" + +#include "util/logging.h" + +namespace neurun +{ +namespace exec +{ + +Execution::Execution(const std::shared_ptr<IExecutor> &executor) : _executor{executor} +{ + _io_desc.inputs.resize(_executor->model().inputs.size()); + _io_desc.outputs.resize(_executor->model().outputs.size()); +} + +void Execution::setInput(const model::IOIndex &index, const void *buffer, size_t length) +{ + const auto input_index = model().inputs.at(index); + const auto info = model().operands.at(input_index).info(); + + if (length < info.total_size()) + { + throw std::runtime_error{"Too small length"}; + } + + _io_desc.inputs.at(index.value()) = nnfw::cpp14::make_unique<InputDesc>(info, buffer, length); +} + +void Execution::setInput(const model::IOIndex &index, const model::TypeInfo &type, + const model::Shape &shape, const void *buffer, size_t length) +{ + const model::OperandInfo info{shape, type}; + + if (length < info.total_size()) + { + throw std::runtime_error{"Too small length"}; + } + + _io_desc.inputs.at(index.value()) = nnfw::cpp14::make_unique<InputDesc>(info, buffer, length); +} + +void Execution::setOutput(const model::IOIndex &index, void *buffer, size_t length) +{ + const auto output_index = model().outputs.at(index); + const auto info = model().operands.at(output_index).info(); + + if (length < info.total_size()) + { + throw std::runtime_error{"Too small length"}; + } + + _io_desc.outputs.at(index.value()) = nnfw::cpp14::make_unique<OutputDesc>(info, buffer, length); +} + +void Execution::setOutput(const model::IOIndex &index, const model::TypeInfo &type, + const model::Shape &shape, void *buffer, size_t length) +{ + const model::OperandInfo info{shape, type}; + + if (length < info.total_size()) + { + throw std::runtime_error{"Too small length"}; + } + + _io_desc.outputs.at(index.value()) = nnfw::cpp14::make_unique<OutputDesc>(info, buffer, length); +} + +void Execution::execute() +{ + VERBOSE(Execution) << "Start execution" << std::endl; + + _executor->execute(_io_desc); + + VERBOSE(Execution) << "Execution finished" << std::endl; +} + +void Execution::startExecute() +{ + VERBOSE(Execution) << "Create asynchronous execution thread" << std::endl; + + _exec_thread = nnfw::cpp14::make_unique<std::thread>(&Execution::execute, this); +} + +void Execution::waitFinish() +{ + VERBOSE(Execution) << "Wait to finish execution" << std::endl; + + _exec_thread->join(); +} + +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/ExecutionObservers.cc b/runtimes/neurun/core/src/exec/ExecutionObservers.cc new file mode 100644 index 000000000..e6561fe5c --- /dev/null +++ b/runtimes/neurun/core/src/exec/ExecutionObservers.cc @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/ExecutionObservers.h" +#include "util/logging.h" +#include "model/operation/PermuteNode.h" +#include "exec/IExecutor.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ + +namespace exec +{ + +void ProfileObserver::handleBegin(neurun::exec::IExecutor *, const neurun::model::Operation *, + const neurun::backend::Backend *backend) +{ + _timer = backend->config()->timer(); + if (_timer == nullptr) + throw std::runtime_error("To profile backend timer() method must be implemented"); + _timer->handleBegin(); +} + +void ProfileObserver::handleEnd(IExecutor *exec, const model::Operation *node, + const backend::Backend *backend) +{ + _timer->handleEnd(); + const auto timer_res = _timer->getTime(); + + auto node_name = node->getName(); + VERBOSE(ProfileInfo) << "Time for " << node_name << " : " << timer_res << std::endl; + + // fill ExecTime: + bool is_quantized = exec->model().operands.at(node->getInputs().at(0)).typeInfo().type() == + model::DataType::QUANT8_ASYMM; + + uint32_t size = 0; + for (const auto &input : node->getInputs()) + { + size += exec->model().operands.at(input).info().total_size(); + } + for (const auto &output : node->getOutputs()) + { + size += exec->model().operands.at(output).info().total_size(); + } + if (node_name == "Permute") + { + auto *permute_node = + nnfw::misc::polymorphic_downcast<const model::operation::PermuteNode *>(node); + assert(permute_node != nullptr); + _et->updatePermuteTime(permute_node->param().input_backend_ctx->backend, + permute_node->param().output_backend_ctx->backend, is_quantized, size, + timer_res); + } + else + { + _et->updateOperationExecTime(backend, node_name, is_quantized, size, timer_res); + } +}; + +} // namespace exec + +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/ExecutorBase.cc b/runtimes/neurun/core/src/exec/ExecutorBase.cc new file mode 100644 index 000000000..827d4dc8b --- /dev/null +++ b/runtimes/neurun/core/src/exec/ExecutorBase.cc @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ExecutorBase.h" +#include "util/logging.h" +namespace neurun +{ +namespace exec +{ + +ExecutorBase::ExecutorBase(const std::shared_ptr<const model::Model> &model, + std::unique_ptr<model::Subgraphs> subgraphs, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<graph::LowerInfoMap> lower_info, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs) + : _observers(), _model{model}, _subgraphs{std::move(subgraphs)}, + _operand_context{operand_context}, _lower_info{std::move(lower_info)}, + _tensor_mgrs{std::move(tensor_mgrs)}, _mutex() +{ + // DO NOTHING +} + +std::unique_ptr<ISource> ExecutorBase::source(const model::IOIndex &index, + const model::TypeInfo &type, const void *buffer, + size_t length) +{ + using ::neurun::model::DataType; + switch (type.type()) + { + case DataType::FLOAT32: + return source<float>(index, buffer, length); + case DataType::INT32: + return source<int32_t>(index, buffer, length); + case DataType::UINT32: + return source<uint32_t>(index, buffer, length); + case DataType::BOOL8: + case DataType::QUANT8_ASYMM: + return source<uint8_t>(index, buffer, length); + default: + throw std::runtime_error("Not supported yet"); + } +} + +std::unique_ptr<ISink> ExecutorBase::sink(const model::IOIndex &index, const model::TypeInfo &type, + void *buffer, size_t length) +{ + using ::neurun::model::DataType; + switch (type.type()) + { + case DataType::FLOAT32: + return sink<float>(index, buffer, length); + case DataType::INT32: + return sink<int32_t>(index, buffer, length); + case DataType::UINT32: + return sink<uint32_t>(index, buffer, length); + case DataType::BOOL8: + case DataType::QUANT8_ASYMM: + return sink<uint8_t>(index, buffer, length); + default: + throw std::runtime_error("Not supported yet"); + } +} + +void ExecutorBase::execute(const IODescription &desc) +{ + // For thread-safe, use mutex + // TODO: if all used backends on this executor are thread-safe, + // do not need to use mutex (otherwise, use mutex) + std::lock_guard<std::mutex> lock(_mutex); + + std::vector<std::unique_ptr<ISource>> sources{_model->inputs.size()}; + std::vector<std::unique_ptr<ISink>> sinks{_model->outputs.size()}; + + // Set input(s) + for (uint32_t n = 0; n < _model->inputs.size(); ++n) + { + model::IOIndex input_index{n}; + model::OperandIndex index{_model->inputs.at(input_index)}; + + if (desc.inputs.at(n) == nullptr) + { + // Optional input + continue; + } + + const auto operand_li = _lower_info->operand.at(index).get(); + if (operand_li->def_factors().empty()) + { + // This input is not used (i.e. constant, EX. reshape's axis) + continue; + } + + const auto &input = *desc.inputs.at(n); + sources.at(n) = source(input_index, input.info.typeInfo(), input.buffer, input.size); + + auto setter = [&](::neurun::backend::operand::ITensor &tensor) { sources.at(n)->push(tensor); }; + + auto object = _operand_context->at(index); + + object->access(setter); + } + + executeImpl(); + + // Get output(s) + for (uint32_t n = 0; n < _model->outputs.size(); ++n) + { + neurun::model::IOIndex output_index{n}; + // Optional output + if (desc.outputs.at(n) == nullptr) + { + continue; + } + const auto &output = *desc.outputs.at(n); + sinks.at(n) = sink(output_index, output.info.typeInfo(), output.buffer, output.size); + + auto getter = [&](::neurun::backend::operand::ITensor &tensor) { sinks.at(n)->pull(tensor); }; + + ::neurun::model::OperandIndex index{_model->outputs.at(output_index)}; + auto object = _operand_context->at(index); + + object->access(getter); + } +} + +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/ExecutorBase.h b/runtimes/neurun/core/src/exec/ExecutorBase.h new file mode 100644 index 000000000..c283e7f61 --- /dev/null +++ b/runtimes/neurun/core/src/exec/ExecutorBase.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_EXECUTOR_BASE_H__ +#define __NEURUN_EXEC_EXECUTOR_BASE_H__ + +#include <mutex> + +#include "Source.h" +#include "exec/ExecutionObservers.h" +#include "Sink.h" +#include "exec/IExecutor.h" +#include "model/Model.h" +#include "graph/LowerInfoMap.h" +#include "backend/IConfig.h" +#include "model/OperandInfo.h" +#include "backend/Backend.h" +#include "compiler/OperandContext.h" +#include "model/Subgraphs.h" +#include "model/Subgraph.h" +#include "backend/ExecTime.h" +#include "exec/IFunction.h" +#include "backend/ITensorManager.h" +#include <list> + +namespace neurun +{ +namespace exec +{ + +class ExecutorBase : public IExecutor +{ +public: + ExecutorBase(const std::shared_ptr<const model::Model> &model, + std::unique_ptr<model::Subgraphs> subgraphs, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<graph::LowerInfoMap> lower_info, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs); + + virtual ~ExecutorBase() = default; + + const model::Model &model() override { return *_model; } + + void execute(const IODescription &desc) final; + + // Used only in Dataflow and Parallel Executors + void setIndexedRanks(std::shared_ptr<model::OperationIndexMap<int64_t>> ranks) final + { + _indexed_ranks = std::move(ranks); + }; + + virtual void executeImpl(void) = 0; + +private: + std::unique_ptr<ISource> source(const model::IOIndex &index, const model::TypeInfo &type, + const void *buffer, size_t length); + std::unique_ptr<ISink> sink(const model::IOIndex &index, const model::TypeInfo &type, + void *buffer, size_t length); + + template <typename T> + std::unique_ptr<ISource> source(const model::IOIndex &index, const void *buffer, size_t length) + { + const auto operand_index = _model->inputs.at(index); + const auto &operand = _model->operands.at(operand_index); + + const auto tensor = _operand_context->at(operand_index)->ptr(); + const auto output_layout = tensor->layout(); + // TODO Set input_layout as frontend model's input layout + auto input_layout = model::Layout::NHWC; + if ((input_layout == model::Layout::NHWC) && (output_layout == model::Layout::NCHW)) + { + return nnfw::cpp14::make_unique<PermutateSource<T>>(buffer, length, operand.shape()); + } + // TODO Supports NCHW -> NHWC + + return nnfw::cpp14::make_unique<CopySource<T>>(buffer, length, operand.shape()); + } + + template <typename T> + std::unique_ptr<ISink> sink(const model::IOIndex &index, void *buffer, size_t length) + { + const auto operand_index = _model->outputs.at(index); + const auto &operand = _model->operands.at(operand_index); + const auto tensor = _operand_context->at(operand_index)->ptr(); + const auto input_layout = tensor->layout(); + // TODO Set output_layout as frontend model's output layout + auto output_layout = model::Layout::NHWC; + if ((input_layout == model::Layout::NCHW) && (output_layout == model::Layout::NHWC)) + { + return nnfw::cpp14::make_unique<PermutateSink<T>>(buffer, length, operand.shape()); + } + // TODO Supports NHWC -> NCHW + + return nnfw::cpp14::make_unique<CopySink<T>>(buffer, length, operand.shape()); + } + +protected: + std::list<std::unique_ptr<IExecutionObserver>> _observers; + std::shared_ptr<model::OperationIndexMap<int64_t>> _indexed_ranks; + std::shared_ptr<const model::Model> _model; + std::unique_ptr<model::Subgraphs> _subgraphs; + std::shared_ptr<compiler::OperandContext> _operand_context; + std::unique_ptr<graph::LowerInfoMap> _lower_info; + std::unique_ptr<backend::TensorManagerSet> _tensor_mgrs; + std::mutex _mutex; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_EXECUTOR_BASE_H__ diff --git a/runtimes/neurun/core/src/exec/FunctionSequence.cc b/runtimes/neurun/core/src/exec/FunctionSequence.cc new file mode 100644 index 000000000..00214fcfa --- /dev/null +++ b/runtimes/neurun/core/src/exec/FunctionSequence.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "FunctionSequence.h" + +namespace neurun +{ +namespace exec +{ + +void FunctionSequence::run() +{ + for (const auto &function : _functions) + { + function->run(); + } +} + +void FunctionSequence::runSync() +{ + for (const auto &function : _functions) + { + function->runSync(); + } +} + +void FunctionSequence::prepare() +{ + for (const auto &function : _functions) + { + function->prepare(); + } +} + +void FunctionSequence::append(std::unique_ptr<IFunction> &&function) +{ + _functions.push_back(std::move(function)); +} + +void FunctionSequence::iterate(const std::function<void(IFunction &)> &fn) +{ + for (const auto &func : _functions) + { + fn(*func); + } +} + +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/FunctionSequence.h b/runtimes/neurun/core/src/exec/FunctionSequence.h new file mode 100644 index 000000000..2ba5c0b08 --- /dev/null +++ b/runtimes/neurun/core/src/exec/FunctionSequence.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_FUNCTION_SEQUENCE_H__ +#define __NEURUN_EXEC_FUNCTION_SEQUENCE_H__ + +#include <memory> +#include <vector> +#include <functional> + +#include "exec/IFunction.h" + +namespace neurun +{ +namespace exec +{ + +class FunctionSequence : public IFunction +{ +public: + virtual ~FunctionSequence() = default; + + void run() override; + void runSync() override; + void prepare() override; + + /** + * @brief Appends an IFunction object to the function sequence + * + * @param function IFunction object to be appended + */ + void append(std::unique_ptr<IFunction> &&function); + + void iterate(const std::function<void(IFunction &)> &fn); + +private: + std::vector<std::unique_ptr<IFunction>> _functions; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_FUNCTION_SEQUENCE_H__ diff --git a/runtimes/neurun/core/src/exec/Job.cc b/runtimes/neurun/core/src/exec/Job.cc new file mode 100644 index 000000000..6ce3a84f9 --- /dev/null +++ b/runtimes/neurun/core/src/exec/Job.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Job.h" + +#include <cassert> + +#include "util/logging.h" + +namespace neurun +{ +namespace exec +{ + +Job::Job(uint32_t index, IFunction *fn, const backend::Backend *backend) + : _index{index}, _fn{fn}, _backend{backend} +{ +} + +void Job::run() { _fn->run(); } + +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/Job.h b/runtimes/neurun/core/src/exec/Job.h new file mode 100644 index 000000000..108f39e99 --- /dev/null +++ b/runtimes/neurun/core/src/exec/Job.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_JOB_H__ +#define __NEURUN_EXEC_JOB_H__ + +#include <unordered_set> + +#include "exec/IFunction.h" +#include "model/Index.h" +#include "model/OperandIndexSequence.h" +#include "backend/Backend.h" + +namespace neurun +{ +namespace exec +{ + +class Job +{ +public: + /** + * @brief Constructs a Job object + * + * @param index Operation index for this job + * @param fn compiled code to run this job + * @param inputs Input operand list + * @param outputs Output operand list + */ + Job(uint32_t index, IFunction *fn, const backend::Backend *backend); + /** + * @brief Execute the compiled code + */ + void run(); + /** + * @brief Return job index + * + * @return Job index + */ + uint32_t index() const { return _index; } + /** + * @brief Return the function to be executed + * + * @return Pointer of the function + */ + IFunction *fn() { return _fn; } + + /** + * @brief Return the backend + * + * @return Backend + */ + const backend::Backend *backend() { return _backend; } + +private: + uint32_t _index; + IFunction *_fn; + const backend::Backend *_backend; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_JOB_H__ diff --git a/runtimes/neurun/core/src/exec/LinearExecutor.cc b/runtimes/neurun/core/src/exec/LinearExecutor.cc new file mode 100644 index 000000000..35197a257 --- /dev/null +++ b/runtimes/neurun/core/src/exec/LinearExecutor.cc @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "LinearExecutor.h" + +namespace neurun +{ +namespace exec +{ + +void LinearExecutor::executeImpl() { _fn_seq->run(); } + +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/LinearExecutor.h b/runtimes/neurun/core/src/exec/LinearExecutor.h new file mode 100644 index 000000000..58c1ea9ae --- /dev/null +++ b/runtimes/neurun/core/src/exec/LinearExecutor.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file LinearExecutor.h + * @brief This file contains LinearExecutor class to define and run execution phase + */ + +#ifndef __NEURUN_EXEC_EXECUTOR_H_ +#define __NEURUN_EXEC_EXECUTOR_H_ + +#include "ExecutorBase.h" +#include "compiler/Linear.h" +#include "exec/FunctionSequence.h" + +namespace neurun +{ +namespace exec +{ + +/** + * @brief Class to handle execution phase. Simple run the sequence of operations that is sorted in + * topological order + */ +class LinearExecutor final : public ExecutorBase +{ +public: + /** + * @brief Construct a new LinearExecutor object + * @param[in] plan Execution plan generated by compiled result + */ + LinearExecutor(const std::shared_ptr<const model::Model> &model, + std::unique_ptr<model::Subgraphs> subgraphs, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<graph::LowerInfoMap> lower_info, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, + std::vector<compiler::Linear::Element> &&elements, + const std::shared_ptr<exec::FunctionSequence> &fn_seq) + : ExecutorBase{model, std::move(subgraphs), operand_context, std::move(lower_info), + std::move(tensor_mgrs)}, + _fn_seq{fn_seq}, _elements{std::move(elements)} + { + } + +public: + void executeImpl(void) override; + +private: + std::shared_ptr<exec::FunctionSequence> _fn_seq; + std::vector<compiler::Linear::Element> _elements; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_EXECUTOR_H_ diff --git a/runtimes/neurun/core/src/exec/ParallelExecutor.cc b/runtimes/neurun/core/src/exec/ParallelExecutor.cc new file mode 100644 index 000000000..81d4ac03f --- /dev/null +++ b/runtimes/neurun/core/src/exec/ParallelExecutor.cc @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ParallelExecutor.h" + +#include <cassert> + +#include "util/logging.h" +#include "exec/IFunction.h" + +namespace neurun +{ +namespace exec +{ + +class HookFunction : public IFunction +{ +public: + HookFunction(IFunction *fn, std::function<void()> teardown) : _fn{fn}, _teardown{teardown} {} + +public: + void run() override + { + // TODO Introduce and call setup() function here + _fn->run(); + _teardown(); + } + void runSync() override { throw("runSync is needed just for profiling in Dataflow executor"); } + +private: + IFunction *_fn; + std::function<void()> _teardown; +}; + +void ParallelExecutor::notify(uint32_t finished_job_id) +{ + std::unique_lock<std::mutex> lock{_mu_jobs}; + + DataflowExecutor::notify(finished_job_id); + + lock.unlock(); + _cv_jobs.notify_all(); +} + +ParallelExecutor::ParallelExecutor(const std::shared_ptr<const model::Model> &model, + std::unique_ptr<model::Subgraphs> subgraphs, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<graph::LowerInfoMap> lower_info, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, + CodeMap &&code_map) + : DataflowExecutor{model, + std::move(subgraphs), + operand_context, + std::move(lower_info), + std::move(tensor_mgrs), + std::move(code_map)} +{ + VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl; +} + +void ParallelExecutor::executeImpl() +{ + // Init scheduler + // TODO Consider to have distinct backend set in LowerInfoMap + graph::BackendSet backends; + for (auto &itr : _lower_info->operation) + { + backends.add(itr.second->backend()); + } + _scheduler = nnfw::cpp14::make_unique<ParallelScheduler>(backends); + + assert(noWaitingJobs()); + + // Execution setup + _waiting_jobs.swap(_finished_jobs); // Move finished jobs to waiting jobs + + for (uint32_t i = 0; i < _waiting_jobs.size(); ++i) + { + VERBOSE(ParallelExecutor) << i << ": " << _input_info[i] << std::endl; + if (_input_info[i] == 0) + { + emplaceToReadyJobs(i); + } + } + assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs + + VERBOSE(ParallelExecutor) << "INITIAL JOBS : " << _ready_jobs.size() << std::endl; + + while (true) + { + std::unique_lock<std::mutex> lock{_mu_jobs}; + + if (_ready_jobs.empty()) + { + _cv_jobs.wait(lock, [this] { return !_ready_jobs.empty() || noWaitingJobs(); }); + // Check finish condition + if (_ready_jobs.empty() && noWaitingJobs()) + { + break; + } + } + + auto job = std::move(_ready_jobs.begin()->second); + _ready_jobs.erase(_ready_jobs.begin()); + + lock.unlock(); + + VERBOSE(ParallelExecutor) << "Assigning fn #" << job->index() << std::endl; + + auto job_index = job->index(); + auto teardown = [&, job_index]() { notify(job_index); }; + + _scheduler->assign(nnfw::cpp14::make_unique<HookFunction>(job->fn(), teardown), job->backend()); + _finished_jobs[job_index] = std::move(job); + } + + assert(noWaitingJobs()); + + // Wait for all the jobs done + _scheduler->finish(); + + // Reset input info for the next execution + _input_info = _initial_input_info; +} + +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/ParallelExecutor.h b/runtimes/neurun/core/src/exec/ParallelExecutor.h new file mode 100644 index 000000000..7a4673b9c --- /dev/null +++ b/runtimes/neurun/core/src/exec/ParallelExecutor.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_PARALLEL_EXECUTOR_H__ +#define __NEURUN_EXEC_PARALLEL_EXECUTOR_H__ + +#include <list> +#include <queue> +#include <unordered_map> + +#include "FunctionSequence.h" +#include "Job.h" +#include "model/OperandIndexSequence.h" +#include "model/Index.h" +#include "model/Model.h" +#include "cpp14/memory.h" +#include "exec/DataflowExecutor.h" +#include "ParallelScheduler.h" + +namespace neurun +{ +namespace exec +{ + +/** + * @brief Class to execute Graph in parallel + */ +class ParallelExecutor : public DataflowExecutor +{ +protected: + void notify(uint32_t finished_job_id) override; + +public: + /** + * @brief Constructs a ParallelExecutor object + * + * @param model Model object + * @param operand_context (Only for input/output operand data access) + * @param lower_info LowerInfo object (Only to know input/output operands layout) + * @param code_map Compiled code map + * @param ranks Operation ranks for ordering execution + */ + ParallelExecutor(const std::shared_ptr<const model::Model> &model, + std::unique_ptr<model::Subgraphs> subgraphs, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<graph::LowerInfoMap> lower_info, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, CodeMap &&code_map); + + void executeImpl() override; + +private: + std::condition_variable _cv_jobs; + std::mutex _mu_jobs; + std::unique_ptr<ParallelScheduler> _scheduler; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_PARALLEL_EXECUTOR_H__ diff --git a/runtimes/neurun/core/src/exec/ParallelScheduler.cc b/runtimes/neurun/core/src/exec/ParallelScheduler.cc new file mode 100644 index 000000000..44f1a5f08 --- /dev/null +++ b/runtimes/neurun/core/src/exec/ParallelScheduler.cc @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ParallelScheduler.h" + +#include <cassert> + +#include "cpp14/memory.h" +#include "util/logging.h" + +namespace neurun +{ +namespace exec +{ + +WorkQueue::~WorkQueue() +{ + { + std::unique_lock<std::mutex> lock(_mu); + _state = State::FORCE_FINISHING; + } + _cv.notify_all(); +} + +void WorkQueue::operator()() +{ + while (true) + { + std::unique_ptr<IFunction> fn = nullptr; + + { + std::unique_lock<std::mutex> lock{_mu}; + _cv.wait(lock, [this] { + return (_state == State::FORCE_FINISHING) || (_state == State::FINISHING) || + (_state == State::ONLINE && !_functions.empty()); + }); + + if (_state == State::FORCE_FINISHING) + { + assert(_functions.empty() && "Terminating with unfinished jobs"); + return; + } + else if (_state == State::FINISHING && _functions.empty()) + { + return; + } + else + { + assert(((_state == State::FINISHING) || (_state == State::ONLINE)) && !_functions.empty()); + fn = std::move(_functions.front()); + _functions.pop(); + } + } + + assert(fn); + fn->run(); + } +} + +void WorkQueue::enqueue(std::unique_ptr<IFunction> &&fn) +{ + { + std::unique_lock<std::mutex> lock{_mu}; + _functions.emplace(std::move(fn)); + } + _cv.notify_one(); +} + +void WorkQueue::terminate() +{ + { + std::unique_lock<std::mutex> lock{_mu}; + _state = State::FORCE_FINISHING; + } + _cv.notify_all(); +} + +void WorkQueue::finish() +{ + { + std::unique_lock<std::mutex> lock{_mu}; + _state = State::FINISHING; + } + _cv.notify_all(); +} + +uint32_t WorkQueue::numJobsInQueue() +{ + std::unique_lock<std::mutex> lock{_mu}; + return _functions.size(); +} + +ThreadPool::ThreadPool(uint32_t num_threads) +{ + assert(num_threads >= 1); + + for (uint32_t i = 0; i < num_threads; i++) + { + _threads.emplace_back(std::ref(_worker)); + } +} + +ThreadPool::~ThreadPool() +{ + if (!_threads.empty()) + { + _worker.terminate(); + join(); + } +} + +void ThreadPool::enqueue(std::unique_ptr<IFunction> &&fn) { _worker.enqueue(std::move(fn)); } + +uint32_t ThreadPool::numJobsInQueue() { return _worker.numJobsInQueue(); } + +void ThreadPool::join() +{ + for (auto &thread : _threads) + { + thread.join(); + } + _threads.clear(); +} + +void ThreadPool::finish() +{ + _worker.finish(); + join(); +} + +ParallelScheduler::ParallelScheduler(const graph::BackendSet &backends) +{ + assert(!backends.empty()); + + for (auto backend : backends) + { + _thread_pools[backend] = nnfw::cpp14::make_unique<ThreadPool>(); + } +} + +void ParallelScheduler::assign(std::unique_ptr<IFunction> &&fn, const backend::Backend *backend) +{ + assert(!_thread_pools.empty()); + + _thread_pools.at(backend)->enqueue(std::move(fn)); +} + +void ParallelScheduler::finish() +{ + for (auto &itr : _thread_pools) + { + itr.second->finish(); + } +} + +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/ParallelScheduler.h b/runtimes/neurun/core/src/exec/ParallelScheduler.h new file mode 100644 index 000000000..9660478e8 --- /dev/null +++ b/runtimes/neurun/core/src/exec/ParallelScheduler.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_PARALLEL_SCHEDULER_H__ +#define __NEURUN_EXEC_PARALLEL_SCHEDULER_H__ + +#include <unordered_map> +#include <thread> +#include <mutex> +#include <condition_variable> +#include <memory> +#include <queue> +#include <vector> +#include <unordered_set> + +#include "exec/IFunction.h" +#include "graph/BackendSet.h" + +namespace neurun +{ +namespace exec +{ + +// TODO Extract this class to a separate file +class WorkQueue +{ +public: + enum class State + { + ONLINE, + FINISHING, + FORCE_FINISHING + }; + +public: + /** + * @brief Create WorkQueue object + */ + WorkQueue() = default; + /** + * @brief Destroy WorkQueue object + */ + ~WorkQueue(); + /** + * @brief Thread entry function + */ + void operator()(); + /** + * @brief Push the given Task to the job queue + * + * @param fn Function to be executed(a job) + */ + void enqueue(std::unique_ptr<IFunction> &&fn); + /** + * @brief Flag as terminating so all the worker threads can terminate + */ + void terminate(); + /** + * @brief Flag as terminating so all the worker threads can terminate + */ + void finish(); + /** + * @brief Check if it has pending jobs. Even if this returns fals, WorkQueue threads may be still + * running + * + * @return true if the job queue not empty otherwise false + */ + uint32_t numJobsInQueue(); + +private: + State _state{State::ONLINE}; + std::queue<std::unique_ptr<IFunction>> _functions; + std::mutex _mu; + std::condition_variable _cv; +}; + +// TODO Extract this class to a separate file +class ThreadPool +{ +public: + /** + * @brief Coustruct ThreadPool object + * + * @param num_threads Number of threads + */ + ThreadPool(uint32_t num_threads = 1); + /** + * @brief Destroy ThreadPool object + */ + ~ThreadPool(); + /** + * @brief Enqueue a function + * + * @param fn A function to be queued + */ + void enqueue(std::unique_ptr<IFunction> &&fn); + /** + * @brief Get number of jobs in worker's queue + * + * @return Number of jobs + */ + uint32_t numJobsInQueue(); + + /** + * @brief Block until all jobs are finished + */ + void finish(); + +private: + void join(); + +private: + WorkQueue _worker; + std::vector<std::thread> _threads; +}; + +class ParallelScheduler +{ +public: + /** + * @brief Constructs ParallelScheduler object + * + * @param backends Backend set + */ + ParallelScheduler(const graph::BackendSet &backends); + /** + * @brief Assign a task to the given backend + * + * @param[in] fn Function to be assigned + * @param[in] fn Target backend + */ + void assign(std::unique_ptr<IFunction> &&fn, const backend::Backend *backend); + /** + * @brief Block until all jobs are finished + */ + void finish(); + +private: + std::unordered_map<const backend::Backend *, std::unique_ptr<ThreadPool>> _thread_pools; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_PARALLEL_SCHEDULER_H__ diff --git a/runtimes/neurun/core/src/exec/Sink.h b/runtimes/neurun/core/src/exec/Sink.h new file mode 100644 index 000000000..7ec3efa22 --- /dev/null +++ b/runtimes/neurun/core/src/exec/Sink.h @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_SINK_H__ +#define __NEURUN_EXEC_SINK_H__ + +#include <cassert> + +#include "cpp14/memory.h" +#include "util/feature/nhwc/View.h" +#include "util/feature/nchw/View.h" +#include "util/Utils.h" +#include <misc/feature/IndexIterator.h> + +namespace neurun +{ +namespace exec +{ +struct ISink +{ + virtual ~ISink() = default; + + virtual void pull(::neurun::backend::operand::ITensor &tensor) const = 0; +}; + +// Create second lever inheritance: the first lever is used as a reference type in use-case places +template <typename T> class ITemplSink : public ISink +{ +public: + ITemplSink(void *output_buffer, const size_t &output_size, const model::Shape &shape, + const bool copy) + : _output_buffer{reinterpret_cast<T *>(output_buffer)}, _output_size{output_size}, + _shape{shape}, _copy{copy} + { + } + +protected: + void pullUnif(neurun::backend::operand::ITensor &tensor) const + { + auto input_buffer = tensor.buffer(); + auto rank = _shape.rank(); + + if (!tensor.has_padding() && rank < 4 + _copy) + { + memcpy(_output_buffer, input_buffer, _output_size); + return; + } + + switch (rank) + { + case 0: + case 1: + { + memcpy(_output_buffer, input_buffer, _output_size); + break; + } + case 2: + { + const int32_t copy_len = _shape.dim(1); + + for (auto i = 0; i < _shape.dim(0); ++i) + { + neurun::util::Coordinates coords{i, 0}; + memcpy(_output_buffer + i * copy_len, input_buffer + tensor.calcOffset(coords), + copy_len * sizeof(T)); + } + break; + } + case 3: + { + const int32_t dim1 = _shape.dim(1); + const int32_t dim2 = _shape.dim(2); + + for (auto i = 0; i < _shape.dim(0); ++i) + { + for (auto j = 0; j < _shape.dim(1); ++j) + { + neurun::util::Coordinates coords{i, j, 0}; + memcpy(_output_buffer + i * dim1 * dim2 + j * dim2, + input_buffer + tensor.calcOffset(coords), dim2 * sizeof(T)); + } + } + break; + } + case 4: + { + if (_copy) + { + const int32_t dim1 = _shape.dim(1); + const int32_t dim2 = _shape.dim(2); + const int32_t dim3 = _shape.dim(3); + + for (auto i = 0; i < _shape.dim(0); ++i) + { + for (auto j = 0; j < _shape.dim(1); ++j) + { + for (auto k = 0; k < _shape.dim(2); ++k) + { + neurun::util::Coordinates coords{i, j, k, 0}; + memcpy(_output_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3, + input_buffer + tensor.calcOffset(coords), dim3 * sizeof(T)); + } + } + } + } + else + { + // TODO Support from nhwc to nchw + auto feature = _shape.asFeature(model::Layout::NHWC); + + const util::feature::nchw::View<T> from{&tensor}; + util::feature::nhwc::View<T> into{feature, _output_buffer, _output_size}; + + ::nnfw::misc::feature::iterate(feature) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, ch, row, col); + into.at(batch, ch, row, col) = value; + }; + } + break; + } + default: + throw std::runtime_error("NYI"); + break; + } + } + +private: + T *_output_buffer; + const size_t _output_size; + const model::Shape _shape; + const bool _copy; +}; + +template <typename T> class PermutateSink final : public ITemplSink<T> +{ +public: + PermutateSink(void *output_buffer, const size_t &output_size, const model::Shape &shape) + : ITemplSink<T>(output_buffer, output_size, shape, false) + { + } + +public: + void pull(neurun::backend::operand::ITensor &tensor) const override + { + ITemplSink<T>::pullUnif(tensor); + } +}; + +// Only supports NHWC format front-end(NNAPI) now +template <typename T> class CopySink final : public ITemplSink<T> +{ +public: + CopySink(void *output_buffer, const size_t &output_size, const model::Shape &shape) + : ITemplSink<T>(output_buffer, output_size, shape, true) + { + } + +public: + void pull(neurun::backend::operand::ITensor &tensor) const override + { + ITemplSink<T>::pullUnif(tensor); + } +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_SINK_H__ diff --git a/runtimes/neurun/core/src/exec/Source.h b/runtimes/neurun/core/src/exec/Source.h new file mode 100644 index 000000000..5b914f714 --- /dev/null +++ b/runtimes/neurun/core/src/exec/Source.h @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_SOURCE_H__ +#define __NEURUN_EXEC_SOURCE_H__ + +#include <cassert> + +#include "cpp14/memory.h" +#include "util/feature/nchw/View.h" +#include "util/feature/nhwc/Reader.h" +#include "util/Utils.h" +#include <misc/feature/IndexIterator.h> +#include "model/Shape.h" + +namespace neurun +{ +namespace exec +{ + +struct ISource +{ + virtual ~ISource() = default; + + virtual void push(::neurun::backend::operand::ITensor &tensor) const = 0; +}; + +// Create second lever inheritance: the first lever is used as a reference type in use-case places +template <typename T> class ITemplSource : public ISource +{ +public: + ITemplSource(const void *input_buffer, const size_t &input_size, const model::Shape &shape, + const bool copy) + : _input_buffer{reinterpret_cast<const T *>(input_buffer)}, _input_size{input_size}, + _shape{shape}, _copy(copy) + { + } + + virtual void push(::neurun::backend::operand::ITensor &tensor) const = 0; + +protected: + void pushUnif(neurun::backend::operand::ITensor &tensor) const + { + auto output_buffer = tensor.buffer(); + auto rank = _shape.rank(); + + if (!tensor.has_padding() && rank < 4 + _copy) + { + memcpy(output_buffer, _input_buffer, _input_size); + return; + } + + switch (rank) + { + case 0: + case 1: + { + memcpy(output_buffer, _input_buffer, _input_size); + break; + } + case 2: + { + const int32_t copy_len = _shape.dim(1); + + for (auto i = 0; i < _shape.dim(0); ++i) + { + neurun::util::Coordinates coords{i, 0}; + memcpy(output_buffer + tensor.calcOffset(coords), _input_buffer + i * copy_len, + copy_len * sizeof(T)); + } + break; + } + case 3: + { + const int32_t dim1 = _shape.dim(1); + const int32_t dim2 = _shape.dim(2); + + for (auto i = 0; i < _shape.dim(0); ++i) + { + for (auto j = 0; j < _shape.dim(1); ++j) + { + neurun::util::Coordinates coords{i, j, 0}; + memcpy(output_buffer + tensor.calcOffset(coords), + _input_buffer + i * dim1 * dim2 + j * dim2, dim2 * sizeof(T)); + } + } + break; + } + case 4: + { + if (_copy) + { + const int32_t dim1 = _shape.dim(1); + const int32_t dim2 = _shape.dim(2); + const int32_t dim3 = _shape.dim(3); + for (auto i = 0; i < _shape.dim(0); ++i) + { + for (auto j = 0; j < _shape.dim(1); ++j) + { + for (auto k = 0; k < _shape.dim(2); ++k) + { + neurun::util::Coordinates coords{i, j, k, 0}; + memcpy(output_buffer + tensor.calcOffset(coords), + _input_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3, + dim3 * sizeof(T)); + } + } + } + } + else + { + auto feature = _shape.asFeature(model::Layout::NHWC); + + const util::feature::nhwc::Reader<T> from{feature, _input_buffer, _input_size}; + util::feature::nchw::View<T> into{&tensor}; + + ::nnfw::misc::feature::iterate(feature) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, ch, row, col); + into.at(batch, ch, row, col) = value; + }; + } + + break; + } + default: + throw std::runtime_error("NYI"); + break; + } + } + +private: + const T *_input_buffer; + const size_t _input_size; + const model::Shape _shape; + const bool _copy; +}; + +template <typename T> class PermutateSource final : public ITemplSource<T> +{ +public: + PermutateSource(const void *input_buffer, const size_t &input_size, const model::Shape &shape) + : ITemplSource<T>(input_buffer, input_size, shape, false) + { + } + +public: + void push(neurun::backend::operand::ITensor &tensor) const override + { + // do NHWC_TO_NCHW permutation + ITemplSource<T>::pushUnif(tensor); + } +}; + +// Only supports NHWC format front-end(NNAPI) now +template <typename T> class CopySource final : public ITemplSource<T> +{ +public: + CopySource(const void *input_buffer, const size_t &input_size, const model::Shape &shape) + : ITemplSource<T>(input_buffer, input_size, shape, true) + { + } + +public: + void push(neurun::backend::operand::ITensor &tensor) const override + { + ITemplSource<T>::pushUnif(tensor); + } +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_SOURCE_H__ diff --git a/runtimes/neurun/core/src/exec/interp/Buffer.h b/runtimes/neurun/core/src/exec/interp/Buffer.h new file mode 100644 index 000000000..3528e0819 --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/Buffer.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Buffer.h + * @brief This file contains Buffer interface and InternalBuffer, ExternalBuffer class + */ +#ifndef __NEURUN_EXEC_INTERP_BUFFER_H__ +#define __NEURUN_EXEC_INTERP_BUFFER_H__ + +#include <cpp14/memory.h> + +#include "model/Data.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Interface for writable data area + */ +class Buffer : public model::Data +{ +public: + /** + * @brief Return writable pointer for data area + * @return Writable pointer + */ + virtual uint8_t *baseWritable(void) const = 0; +}; + +/** + * @brief Class for internally allocated data area + */ +class InternalBuffer final : public Buffer +{ +public: + InternalBuffer(size_t size) : _base{nnfw::cpp14::make_unique<uint8_t[]>(size)}, _size{size} + { + // DO NOTHING + } + +public: + size_t size(void) const override { return _size; } + const uint8_t *base(void) const override { return _base.get(); } + uint8_t *baseWritable(void) const override { return _base.get(); } + +private: + std::unique_ptr<uint8_t[]> _base; + size_t _size; +}; + +/** + * @brief Class for data area from outside + */ +class ExternalBuffer final : public Buffer +{ +public: + ExternalBuffer(uint8_t *base, size_t size) : _base{base}, _size{size} + { + // DO NOTHING + } + +public: + size_t size(void) const override { return _size; } + const uint8_t *base(void) const override { return _base; } + uint8_t *baseWritable(void) const override { return _base; } + +private: + uint8_t *_base; + size_t _size; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_BUFFER_H__ diff --git a/runtimes/neurun/core/src/exec/interp/ExecEnv.h b/runtimes/neurun/core/src/exec/interp/ExecEnv.h new file mode 100644 index 000000000..c270d723c --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/ExecEnv.h @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file ExecEnv.h + * @brief This file contains ExecEnv to access interpreter tensor and execution status + */ +#ifndef __NEURUN_EXEC_INTERP_EXEC_ENV_H_ +#define __NEURUN_EXEC_INTERP_EXEC_ENV_H_ + +#include <unordered_set> + +#include "model/Model.h" +#include "Tensor.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Class to gather interpreter execution environment + * Each interpreter instance own execution environment + */ +class ExecEnv +{ +public: + /** + * @brief Construct a new Exec Env object (deleted) + */ + ExecEnv(void) = delete; + /** + * @brief Construct a new ExecEnv object + * @param[in] model Model to execute by interpreter + */ + ExecEnv(const std::shared_ptr<const model::Model> &model) : _model{model} + { + // DO NOTHING + } + +public: + /** + * @brief Return model to execute + * @return Model + */ + const model::Model &model(void) const { return *_model; } + /** + * @brief Assign tensor to environment which have allocated or assigned buffer + * @param[in] index Tensor index + * @param[in] tensor Tensor + */ + void assignTensor(const model::OperandIndex index, std::shared_ptr<ITensor> tensor) + { + assert(tensor->bufferRO() != nullptr); + _tensors.emplace(index, tensor); + } + + /** + * @brief Return tensor pointer in environment + * @param[in] index Tensor index + * @return Tensor pointer + */ + const ITensor *tensorAt(const model::OperandIndex index) const + { + return _tensors.at(index).get(); + } + + /** + * @brief Check environment contains tensor + * @param[in] index Tensor index + * @return @c true if environment contain tensor, otherwise @c false + */ + bool contains(const model::OperandIndex index) const + { + return (_tensors.find(index) != _tensors.end()); + } + + /** + * @brief Allocate tensor using operand info + * @param[in] index Tensor index + * @param[in] info Operand info + * @note If already allocated, just return + * @TODO More smart allocation policy + */ + void allocateIfNeeded(const model::OperandIndex index, const model::OperandInfo &info) + { + // already allocated, or constant + if (contains(index)) + { + return; + } + + auto tensor = std::make_shared<Tensor>(info); + tensor->setBuffer(std::make_shared<InternalBuffer>(tensor->total_size())); + assignTensor(index, tensor); + _buffers.insert(index); + } + + /** + * @brief Allocate read-only tensor and share data with other tensor + * @param[in] index Tensor index + * @param[in] info Operand info + * @param[in] index_to_share Tensor index that have data to share + */ + void allocateAndShareIfNeeded(const model::OperandIndex index, const model::OperandInfo &info, + const model::OperandIndex index_to_share) + { + if (!contains(index_to_share)) + { + throw std::runtime_error{"Cannot find tensor to share data"}; + } + + // already allocated + if (contains(index)) + { + return; + } + else + { + auto tensor = std::make_shared<ROTensor>(info); + tensor->setData(tensorAt(index_to_share)->shareData()); + assignTensor(index, tensor); + _buffers.insert(index); + } + } + + /** + * @brief Free buffer if allocated by allocateIfNeed + * @param[in] index Tensor index + * @note If allocated by outside, just return + */ + void freeIfAllocated(const model::OperandIndex index) + { + if (_buffers.find(index) != _buffers.end()) + { + _tensors.at(index)->releaseData(); + } + } + +private: + std::shared_ptr<const model::Model> _model; + // Tensor map to use in interpreter + // It should map tensors that have allocated or assigned buffer pointer + std::unordered_map<model::OperandIndex, std::shared_ptr<ITensor>> _tensors; + // Tensors allocated by allocateIfNeed (buffer) + std::unordered_set<model::OperandIndex> _buffers; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_EXEC_ENV_H_ diff --git a/runtimes/neurun/core/src/exec/interp/ExecManager.cc b/runtimes/neurun/core/src/exec/interp/ExecManager.cc new file mode 100644 index 000000000..96f503eea --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/ExecManager.cc @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ExecManager.h" +#include "ExecEnv.h" +#include "Interpreter.h" + +#include "util/logging.h" + +#include <cpp14/memory.h> + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +void ExecManager::execute(const IODescription &desc) +{ + /************************************************************************ + * Prepare execution model (submodel) + It may execute divided model + but now consider model inference is done at interpreter + ***********************************************************************/ + model::OperandIndexMap<std::shared_ptr<ITensor>> tensor_map; + + for (uint32_t n = 0; n < _model->inputs.size(); n++) + { + neurun::model::IOIndex index{n}; + const auto input_index = _model->inputs.at(index); + const auto &input = *desc.inputs.at(n); + + auto input_tensor = std::make_shared<ROTensor>(input.info); + input_tensor->setData(std::make_shared<const model::ExternalData>( + reinterpret_cast<const uint8_t *>(input.buffer), input.size)); + tensor_map[input_index] = input_tensor; + } + + for (uint32_t n = 0; n < _model->outputs.size(); n++) + { + neurun::model::IOIndex index{n}; + const auto output_index = _model->outputs.at(index); + const auto &output = *desc.outputs.at(n); + + auto output_tensor = std::make_shared<Tensor>(output.info); + output_tensor->setBuffer( + std::make_shared<ExternalBuffer>(reinterpret_cast<uint8_t *>(output.buffer), output.size)); + tensor_map[output_index] = output_tensor; + } + + /************************************************************************ + * Prepare execution environment + Execution environment will be assigned to invoked interpreter instance + ***********************************************************************/ + + std::unique_ptr<ExecEnv> interp_env = nnfw::cpp14::make_unique<ExecEnv>(_model); + + // Assign input tensor into interpreter execution environment + for (auto index : _model->inputs) + { + if (tensor_map.find(index) != tensor_map.end()) + { + VERBOSE(INTERPRETER) << "Assign input tensor. operand index:" << index.value() << std::endl; + interp_env->assignTensor(index, tensor_map.at(index)); + } + } + + // Assign output tensor into interpreter execution environment + for (auto index : _model->outputs) + { + if (tensor_map.find(index) != tensor_map.end()) + { + VERBOSE(INTERPRETER) << "Assign output tensor. operand index: " << index.value() << std::endl; + interp_env->assignTensor(index, tensor_map.at(index)); + } + } + + // Allocate constant tensor + _model->operands.iterate([&](const model::OperandIndex &ind, const model::Operand &obj) { + if (obj.isConstant()) + { + VERBOSE(INTERPRETER) << "Allocate and assign constant tensor. operand index:" << ind.value() + << std::endl; + + auto const_tensor = std::make_shared<ROTensor>(obj.info()); + // Assume that interpreter's tensor layout is same with model (NHWC) + const_tensor->setData( + std::make_shared<model::ExternalData>(obj.data().base(), obj.info().total_size())); + interp_env->assignTensor(ind, const_tensor); + } + }); + + /***************************************************************************** + * Invoke interpreter + ****************************************************************************/ + + Interpreter interp(std::move(interp_env)); + interp.run(); + + /***************************************************************************** + * Invoked interpreter run is finished + ****************************************************************************/ + + // If interpreter execute submodel + // 1. Get tensor output of submodel into tensor_map to save result + // 2. Generate new ExecEnv for next interpretation +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/interp/ExecManager.h b/runtimes/neurun/core/src/exec/interp/ExecManager.h new file mode 100644 index 000000000..77486dcaf --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/ExecManager.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file ExecManager.h + * @brief This file contains ExecManager class\n + * to manage interpreter execution and environment + */ +#ifndef __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_ +#define __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_ + +#include "model/OperandIndexMap.h" +#include "model/OperationIndexMap.h" +#include "exec/IExecutor.h" +#include "Tensor.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Class to execute model using interpreter + */ +class ExecManager final : public IExecutor +{ +public: + ExecManager(const std::shared_ptr<const model::Model> &model) : _model{model} + { + // DO NOTHING + } + +public: + /** + * @brief Return graph model + * @return Graph model + */ + const model::Model &model() override { return *_model; } + void setIndexedRanks(std::shared_ptr<model::OperationIndexMap<int64_t>>) override{ + // Not implemented + }; + /** + * @brief Start execution + * @note It should be called after setting input and output buffer + */ + void execute(const IODescription &desc) final; + +private: + std::shared_ptr<const model::Model> _model; + model::OperandIndexMap<std::shared_ptr<ITensor>> _tensor_map; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_ diff --git a/runtimes/neurun/core/src/exec/interp/Interpreter.cc b/runtimes/neurun/core/src/exec/interp/Interpreter.cc new file mode 100644 index 000000000..81de27c36 --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/Interpreter.cc @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Interpreter.h" + +#include <stack> +#include <unordered_set> + +#include "Registration.h" + +#include "model/OperandIndexMap.h" +#include "util/logging.h" +#include "model/OperationVisitor.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +// TODO more structured execution kernel implementation +// TODO use cker for execution +// TODO divide tensor prepare and execution +// TODO introduce memory manager (buffer allocate and free) +class OperationExecutor : model::OperationVisitor +{ +public: +#define OP(InternalName, IsNnApi) InternalName, + enum class NodeName + { +#include "model/Operations.lst" + }; +#undef OP + +public: + OperationExecutor(ExecEnv *env) : _env{env} + { + _kernels[NodeName::AddNode] = getAddNode(); + _kernels[NodeName::Conv2DNode] = getConv2DNode(); + _kernels[NodeName::MaxPool2DNode] = getMaxPool2DNode(); + _kernels[NodeName::ConcatNode] = getConcatNode(); + _kernels[NodeName::AvgPool2DNode] = getAvgPool2DNode(); + _kernels[NodeName::FullyConnectedNode] = getFullyConnectedNode(); + _kernels[NodeName::SoftmaxNode] = getSoftMaxNode(); + _kernels[NodeName::ReshapeNode] = getReshapeNode(); + _kernels[NodeName::DepthwiseConv2DNode] = getDepthwiseConvNode(); + } + + void execute(const model::OperationIndex &idx) + { + const auto nodeName = _env->model().operations.at(idx).getName(); + VERBOSE(INTERPRETER) << "Prepare output operands and execute " << nodeName + << " operation (id: " << idx.value() << ")" << std::endl; + _env->model().operations.at(idx).accept(*this); + } + +private: +#define OP(InternalName, IsNnApi) \ + virtual void visit(const model::operation::InternalName &node) override \ + { \ + if (_kernels[NodeName::InternalName]->prepare != nullptr) \ + { \ + _kernels[NodeName::InternalName]->prepare(_env, node); \ + } \ + _kernels[NodeName::InternalName]->invoke(_env, node); \ + } +#include "model/Operations.lst" +#undef OP + +private: + ExecEnv *_env; + std::unordered_map<NodeName, OpKernel *> _kernels; +}; + +void Interpreter::run() +{ + VERBOSE(INTERPRETER) << "Interpreter is invoked " << std::endl; + + // operand_stack: save operands prepared to use + std::stack<model::OperandIndex> operand_stack; + + // Note: We should push input first, then constant. + // We use use-def for find operators ready to execution, + // but Use-Def cannot handle parameters (maybe constant, but not always) + // Note: If all model inputs are constant, it may not work (depend on tensors' order). + // But that scenario may not exist + for (auto ind : _env->model().inputs) + { + VERBOSE(INTERPRETER) << "Input: Push to operand stack " << ind.value() << std::endl; + + operand_stack.push(ind); + } + + _env->model().operands.iterate([&](const model::OperandIndex &ind, const model::Operand &obj) { + if (obj.isConstant()) + { + VERBOSE(INTERPRETER) << "Constant: Push to operand stack " << ind.value() << std::endl; + + operand_stack.push(ind); + } + }); + + // Execution + std::unordered_set<model::OperandIndex> ready_check; + std::unordered_set<model::OperationIndex> executed; + OperationExecutor executor{_env.get()}; + while (!operand_stack.empty()) + { + const auto current_operand_index = operand_stack.top(); + operand_stack.pop(); + VERBOSE(INTERPRETER) << "Poped operand " << current_operand_index.value() + << " is checked ready to use" << std::endl; + + assert(ready_check.find(current_operand_index) == ready_check.end()); + ready_check.insert(current_operand_index); + + // Find prepared operations by scan use of current operand + std::stack<model::OperationIndex> operation_stack; + const auto use_operators = _env->model().operands.at(current_operand_index).getUses(); + for (auto use_operator : use_operators.list()) + { + // Assumption: all parameters are ready to use + bool operator_ready = true; + for (auto input_index : _env->model().operations.at(use_operator).getInputs()) + { + if (ready_check.find(input_index) == ready_check.end()) + { + operator_ready = false; + break; + } + } + + if (operator_ready) + { + VERBOSE(INTERPRETER) << "Ready to execute operation " << use_operator.value() << std::endl; + operation_stack.push(use_operator); + } + } + + while (!operation_stack.empty()) + { + const auto current_operation_index = operation_stack.top(); + operation_stack.pop(); + VERBOSE(INTERPRETER) << "Poped operation: " << current_operation_index.value() << "(" + << _env->model().operations.at(current_operation_index).getName() << ")" + << std::endl; + + // execution + // 1. Prepare output tensor + // 2. Call operation kernel + executor.execute(current_operation_index); + executed.insert(current_operation_index); + + // 3. Push each output into operand stack + const auto def_operands = _env->model().operations.at(current_operation_index).getOutputs(); + for (auto def_operand : def_operands) + { + VERBOSE(INTERPRETER) << "Buffer: Push to operand stack " << def_operand.value() + << std::endl; + operand_stack.push(def_operand); + } + + // 4. Free if lifetime of buffer operands used by input is finished + for (auto input_index : _env->model().operations.at(current_operation_index).getInputs()) + { + const auto use_operators = _env->model().operands.at(input_index).getUses(); + bool dead_buffer = true; + for (auto use_operator : use_operators.list()) + { + if (executed.find(use_operator) == executed.end()) + { + dead_buffer = false; + break; + } + } + + if (dead_buffer) + { + _env->freeIfAllocated(input_index); + } + } + } + } +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/interp/Interpreter.h b/runtimes/neurun/core/src/exec/interp/Interpreter.h new file mode 100644 index 000000000..1b73592b3 --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/Interpreter.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Interpreter.h + * @brief This file contains Interpreter class for interpretation + */ +#ifndef __NEURUN_EXEC_INTERP_INTERPRETER_H__ +#define __NEURUN_EXEC_INTERP_INTERPRETER_H__ + +#include "ExecEnv.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Class for interpretation + */ +class Interpreter +{ + +public: + /** + * @brief Construct a new Interpreter object (deleted) + */ + Interpreter() = delete; + /** + * @brief Construct a new Interpreter object + * @param[in] env Execution environment variable for interpreter object + */ + Interpreter(std::unique_ptr<ExecEnv> env) : _env{std::move(env)} + { + // DO NOTHING + } + +public: + /** + * @brief Run interpreter until there is no operation to execute + */ + void run(); + +private: + std::unique_ptr<ExecEnv> _env; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_INTERPRETER_H__ diff --git a/runtimes/neurun/core/src/exec/interp/Registration.h b/runtimes/neurun/core/src/exec/interp/Registration.h new file mode 100644 index 000000000..37c591f9d --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/Registration.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_INTERP_REGISTRATION_H__ +#define __NEURUN_EXEC_INTERP_REGISTRATION_H__ + +#include "ExecEnv.h" + +#include "model/Operation.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +struct OpKernel +{ + std::function<void(ExecEnv *, const model::Operation &)> prepare; + std::function<void(const ExecEnv *, const model::Operation &)> invoke; +}; + +// Defined in operations/ directory +OpKernel *getAddNode(); +OpKernel *getConv2DNode(); +OpKernel *getMaxPool2DNode(); +OpKernel *getConcatNode(); +OpKernel *getAvgPool2DNode(); +OpKernel *getFullyConnectedNode(); +OpKernel *getSoftMaxNode(); +OpKernel *getDepthwiseConvNode(); +OpKernel *getReshapeNode(); + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_REGISTRATION_H__ diff --git a/runtimes/neurun/core/src/exec/interp/Tensor.cc b/runtimes/neurun/core/src/exec/interp/Tensor.cc new file mode 100644 index 000000000..becb73786 --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/Tensor.cc @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Tensor.h" + +#define NO_USE(a) (void)(a) + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +size_t ROTensor::calcOffset(const neurun::util::Coordinates &coords) const +{ + NO_USE(coords); + throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now."); +} + +size_t Tensor::calcOffset(const neurun::util::Coordinates &coords) const +{ + NO_USE(coords); + throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now."); +} + +model::Layout ROTensor::layout() const +{ + // TODO Changes to return frontend layout + return model::Layout::NHWC; +} + +model::Layout Tensor::layout() const +{ + // TODO Changes to return frontend layout + return model::Layout::NHWC; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/interp/Tensor.h b/runtimes/neurun/core/src/exec/interp/Tensor.h new file mode 100644 index 000000000..c8237de1e --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/Tensor.h @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Tensor.h + * @brief This file contains ITensor interface, ROTensor class, and Tensor class + */ +#ifndef __NEURUN_EXEC_INTERP_TENSOR_H__ +#define __NEURUN_EXEC_INTERP_TENSOR_H__ + +#include "Buffer.h" + +#include "model/OperandInfo.h" +#include "backend/operand/ITensor.h" +#include "model/Layout.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Interface to handle Tensor in interpreter + */ +class ITensor : public backend::operand::ITensor +{ +public: + virtual ~ITensor() = default; + +public: + virtual uint8_t *buffer() const = 0; + /** + * @brief Return shared pointer for buffer + * @return Buffer shared pointer + */ + virtual std::shared_ptr<const Buffer> shareBuffer() const = 0; + /** + * @brief Return read-only buffer pointer + * @return Read-only buffer pointer + */ + virtual const uint8_t *bufferRO() const = 0; + /** + * @brief Return shared pointer for data + * @return Data shared pointer + */ + virtual std::shared_ptr<const model::Data> shareData() const = 0; + /** + * @brief Set internal/external buffer + * @param[in] buffer Buffer pointer + */ + virtual void setBuffer(std::shared_ptr<const Buffer> buffer) = 0; + /** + * @brief Set data reference (including constant, input) + * @param[in] data Data pointer + */ + virtual void setData(std::shared_ptr<const model::Data> data) = 0; + virtual void releaseData() = 0; + + virtual size_t total_size() const = 0; + virtual size_t dimension(size_t index) const = 0; + virtual size_t num_dimensions() const = 0; + virtual size_t calcOffset(const util::Coordinates &coords) const = 0; + + virtual bool has_padding() const = 0; + /** + * @brief Return data type of tensor + * @return Data type of tensor + */ + virtual model::DataType data_type() const = 0; + /** + * @brief Return TensorInfo + * @return TensorInfo + */ + virtual const model::OperandInfo &tensorInfo() const = 0; + /** + * @brief Return number of elements + * @return Number of elements + */ + virtual uint64_t num_elements() const = 0; +}; + +/** + * @brief Class to handle tensor in interpreter as read-only + */ +class ROTensor final : public ITensor +{ +public: + ROTensor() = delete; + ROTensor(const model::OperandInfo &info) : _info(info) + { + // DO NOTHING + } + +public: + uint8_t *buffer() const override { throw std::runtime_error{"Read only tensor"}; } + std::shared_ptr<const Buffer> shareBuffer() const override + { + throw std::runtime_error{"Read only tensor"}; + } + const uint8_t *bufferRO() const override { return _data->base(); } + std::shared_ptr<const model::Data> shareData() const override { return _data; } + void setBuffer(std::shared_ptr<const Buffer> buffer) override { _data = buffer; } + void setData(std::shared_ptr<const model::Data> data) override { _data = data; } + void releaseData() override { _data = nullptr; } + + size_t total_size() const override { return _info.total_size(); } + size_t dimension(size_t index) const override { return _info.shape().dim(index); } + size_t num_dimensions() const override { return _info.shape().rank(); } + size_t calcOffset(const util::Coordinates &coords) const override; + model::Layout layout() const override; + bool has_padding() const override { return false; } + model::DataType data_type() const override { return _info.typeInfo().type(); } + const model::OperandInfo &tensorInfo() const override { return _info; } + uint64_t num_elements() const override { return _info.shape().num_elements(); }; + +private: + const model::OperandInfo _info; + std::shared_ptr<const model::Data> _data{nullptr}; +}; + +/** + * @brief Class to handle tensor in interpreter as writable + */ +class Tensor final : public ITensor +{ +public: + Tensor() = delete; + Tensor(const model::OperandInfo &info) : _info(info) + { + // DO NOTHING + } + +public: + uint8_t *buffer() const override { return _buffer->baseWritable(); } + std::shared_ptr<const Buffer> shareBuffer() const override { return _buffer; }; + const uint8_t *bufferRO() const override { return _buffer->base(); } + std::shared_ptr<const model::Data> shareData() const override { return _buffer; } + void setBuffer(std::shared_ptr<const Buffer> buffer) override { _buffer = buffer; } + void setData(std::shared_ptr<const model::Data>) override + { + throw std::runtime_error{"Passed data may read-only"}; + } + void releaseData() override { _buffer = nullptr; } + + size_t total_size() const override { return _info.total_size(); } + size_t dimension(size_t index) const override { return _info.shape().dim(index); } + size_t num_dimensions() const override { return _info.shape().rank(); } + size_t calcOffset(const util::Coordinates &coords) const override; + model::Layout layout() const override; + bool has_padding() const override { return false; } + model::DataType data_type() const override { return _info.typeInfo().type(); } + const model::OperandInfo &tensorInfo() const override { return _info; } + uint64_t num_elements() const override { return _info.shape().num_elements(); }; + +private: + const model::OperandInfo _info; + std::shared_ptr<const Buffer> _buffer{nullptr}; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_TENSOR_H__ diff --git a/runtimes/neurun/core/src/exec/interp/operations/Add.cc b/runtimes/neurun/core/src/exec/interp/operations/Add.cc new file mode 100644 index 000000000..666c3cba6 --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/operations/Add.cc @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Add.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "model/operation/AddNode.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace add +{ + +void prepareAdd(ExecEnv *env, const model::Operation &node) +{ + const auto &add_node = nnfw::misc::polymorphic_downcast<const model::operation::AddNode &>(node); + + const auto lhs_index = node.getInputs().at(add_node.LHS); + const auto rhs_index = node.getInputs().at(add_node.RHS); + const auto out_index = node.getOutputs().at(0); + + // Check lhs shape is same with rhs (with broadcast) + const auto lhs_tensor = env->tensorAt(lhs_index); + const auto rhs_tensor = env->tensorAt(rhs_index); + UNUSED_RELEASE(rhs_tensor); + + // Check shape and type lhs is same with rhs + // TODO Util function to compare TensorInfo + // TODO Handle broadcasting + assert(lhs_tensor->data_type() == rhs_tensor->data_type()); + assert(lhs_tensor->num_dimensions() == rhs_tensor->num_dimensions()); + for (uint32_t i = 0; i < lhs_tensor->num_dimensions(); i++) + { + assert(lhs_tensor->dimension(i) == rhs_tensor->dimension(i)); + } + + // Output's shape and type should be same with input (don't consider broadcast) + auto output_info = lhs_tensor->tensorInfo(); + // We can handle already allocated (ex. model output) + env->allocateIfNeeded(out_index, output_info); + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Check shape and type lhs is same with output + // TODO Util function to compare TensorInfo + // TODO Handle broadcasting + assert(lhs_tensor->data_type() == out_tensor->data_type()); + assert(lhs_tensor->num_dimensions() == out_tensor->num_dimensions()); + for (uint32_t i = 0; i < lhs_tensor->num_dimensions(); i++) + { + assert(lhs_tensor->dimension(i) == out_tensor->dimension(i)); + } +} + +inline void setActivationParams(float min, float max, nnfw::cker::AddParam *params) +{ + params->float_activation_min = min; + params->float_activation_max = max; +} + +inline void setActivationParams(int32_t min, int32_t max, nnfw::cker::AddParam *params) +{ + params->quantized_activation_min = min; + params->quantized_activation_max = max; +} + +template <typename raw_type> +void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor, + const model::operation::AddNode::Param ¶m) +{ + const auto lhs_buffer = lhs_tensor->bufferRO(); + const auto rhs_buffer = rhs_tensor->bufferRO(); + auto out_buffer = out_tensor->buffer(); + + nnfw::cker::AddParam cker_param; + raw_type activation_min, activation_max; + calculateActivationRange(param.activation, &activation_min, &activation_max); + setActivationParams(activation_min, activation_max, &cker_param); + const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape()); + const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape()); + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + const raw_type *lhs_ptr = reinterpret_cast<const raw_type *>(lhs_buffer); + const raw_type *rhs_ptr = reinterpret_cast<const raw_type *>(rhs_buffer); + raw_type *out_ptr = reinterpret_cast<raw_type *>(out_buffer); + + // Calculate + nnfw::cker::Add(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr, out_shape, out_ptr); +} + +void invokeAdd(const ExecEnv *env, const model::Operation &node) +{ + const auto &add_node = nnfw::misc::polymorphic_downcast<const model::operation::AddNode &>(node); + + const auto lhs_index = node.getInputs().at(add_node.LHS); + const auto rhs_index = node.getInputs().at(add_node.RHS); + const auto out_index = node.getOutputs().at(0); + const auto lhs_tensor = env->tensorAt(lhs_index); + const auto rhs_tensor = env->tensorAt(rhs_index); + const auto out_tensor = env->tensorAt(out_index); + const auto data_type = lhs_tensor->data_type(); + + if (data_type == model::DataType::INT32) + { + invoke<int32_t>(lhs_tensor, rhs_tensor, out_tensor, add_node.param()); + } + else if (data_type == model::DataType::FLOAT32) + { + invoke<float>(lhs_tensor, rhs_tensor, out_tensor, add_node.param()); + } + else + { + throw std::runtime_error{"NYI: Unsupported data type"}; + } +} +} // namespace add + +OpKernel *getAddNode() +{ + static OpKernel kernel = {add::prepareAdd, add::invokeAdd}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/interp/operations/AvgPool2D.cc b/runtimes/neurun/core/src/exec/interp/operations/AvgPool2D.cc new file mode 100644 index 000000000..b6dfba85c --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/operations/AvgPool2D.cc @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/AveragePool.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "model/operation/AvgPool2DNode.h" +#include "util/Utils.h" +#include "util/Padding.h" +#include "util/ShapeInference.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace avgpool2d +{ + +void prepareAvgPool2D(ExecEnv *env, const model::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + UNUSED_RELEASE(in_tensor); + + assert(in_tensor->num_dimensions() == 4); + + const auto output_info = env->model().operands.at(out_index).info(); + if (output_info.total_size() == 0) + { + // Handle unspecified output shape + const auto &avgpool_node = + nnfw::misc::polymorphic_downcast<const model::operation::AvgPool2DNode &>(node); + const auto infered_output_shapes = + shape_inference::inferAvgPoolShape(in_tensor->tensorInfo().shape(), avgpool_node.param()); + env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()}); + } + else + { + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 4); +} + +void invoke(const ITensor *in_tensor, const ITensor *out_tensor, + const model::operation::AvgPool2DNode::Param ¶m) +{ + // TODO Support NCHW frontend + const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(model::Layout::NHWC); + const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(model::Layout::NHWC); + const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape, + param.stride, param.kw, param.kh); + // Calculate + nnfw::cker::AveragePoolParams cker_param; + calculateActivationRange(param.activation, &cker_param.float_activation_min, + &cker_param.float_activation_max); + cker_param.filter_width = param.kw; + cker_param.filter_height = param.kh; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + + const auto in_shape = convertShape(in_tensor->tensorInfo().shape()); + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO()); + float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer()); + + nnfw::cker::AveragePool(cker_param, in_shape, in_ptr, out_shape, out_ptr); +} + +void invokeAvgPool2D(const ExecEnv *env, const model::Operation &node) +{ + const auto &avgpool_node = + nnfw::misc::polymorphic_downcast<const model::operation::AvgPool2DNode &>(node); + + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + // Check lhs shape is same with rhs (with broadcast) + const auto in_tensor = env->tensorAt(in_index); + const auto out_tensor = env->tensorAt(out_index); + + const auto data_type = in_tensor->data_type(); + if (data_type == model::DataType::FLOAT32) + { + invoke(in_tensor, out_tensor, avgpool_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float only"}; + } +} +} // namespace avgpool2d + +OpKernel *getAvgPool2DNode() +{ + static OpKernel kernel = {avgpool2d::prepareAvgPool2D, avgpool2d::invokeAvgPool2D}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/interp/operations/Concat.cc b/runtimes/neurun/core/src/exec/interp/operations/Concat.cc new file mode 100644 index 000000000..09a86c179 --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/operations/Concat.cc @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Concatenation.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "model/operation/ConcatNode.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace concat +{ + +void prepareConcat(ExecEnv *env, const model::Operation &node) +{ + const auto &concat_node = + nnfw::misc::polymorphic_downcast<const model::operation::ConcatNode &>(node); + + const auto first_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto first_tensor = env->tensorAt(first_index); + uint32_t out_axis_dimension = 0; + const int32_t axis_raw = concat_node.param().axis; + const uint32_t axis = (axis_raw < 0) ? (axis_raw + first_tensor->num_dimensions()) : axis_raw; + + // All inputs shape should be same except axis dimension + // All inputs type should be same + for (auto input : node.getInputs()) + { + assert(first_tensor->num_dimensions() == env->tensorAt(input)->num_dimensions()); + assert(first_tensor->data_type() == env->tensorAt(input)->data_type()); + for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++) + { + if (i == axis) + { + out_axis_dimension += env->tensorAt(input)->dimension(i); + continue; + } + assert(first_tensor->dimension(i) == env->tensorAt(input)->dimension(i)); + } + } + + // Make output tensor info using first input tensor info, and accumulated axis dimension value + auto out_shape = first_tensor->tensorInfo().shape(); + out_shape.dim(axis) = out_axis_dimension; + env->allocateIfNeeded(out_index, + model::OperandInfo{out_shape, first_tensor->tensorInfo().typeInfo()}); + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Output shape should be same with input except axis dimension + // Output type should be same with input + assert(first_tensor->data_type() == out_tensor->data_type()); + for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++) + { + if (i == axis) + { + continue; + } + assert(first_tensor->dimension(i) == out_tensor->dimension(i)); + } +} + +void invoke(const std::vector<const ITensor *> in_tensors, const ITensor *out_tensor, uint32_t axis) +{ + const uint32_t count = in_tensors.size(); + + // Calculate + nnfw::cker::ConcatenationParams cker_param; + cker_param.axis = (int8_t)axis; + cker_param.inputs_count = count; + + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + + std::vector<nnfw::cker::Shape> in_shapes; + std::vector<const nnfw::cker::Shape *> in_shape_ptrs; + in_shapes.reserve(count); + in_shape_ptrs.reserve(count); + std::vector<const float *> in_ptrs; + for (uint32_t i = 0; i < count; i++) + { + in_shapes.push_back(convertShape(in_tensors[i]->tensorInfo().shape())); + in_shape_ptrs.push_back(&in_shapes[i]); + in_ptrs.push_back(reinterpret_cast<const float *>(in_tensors[i]->bufferRO())); + } + + auto out_buffer = out_tensor->buffer(); + float *out_ptr = reinterpret_cast<float *>(out_buffer); + + nnfw::cker::Concatenation<float>(cker_param, in_shape_ptrs.data(), in_ptrs.data(), out_shape, + out_ptr); +} + +void invokeConcat(const ExecEnv *env, const model::Operation &node) +{ + const auto &concat_node = + nnfw::misc::polymorphic_downcast<const model::operation::ConcatNode &>(node); + const int32_t axis_raw = concat_node.param().axis; + + std::vector<const ITensor *> in_tensors; + for (const auto &e : concat_node.getInputs()) + { + in_tensors.emplace_back(env->tensorAt(e)); + } + + const auto out_index = node.getOutputs().at(0); + const auto out_tensor = env->tensorAt(out_index); + const uint32_t axis = (axis_raw < 0) ? (axis_raw + out_tensor->num_dimensions()) : axis_raw; + + const auto data_type = in_tensors[0]->data_type(); + if (data_type == model::DataType::FLOAT32) + { + invoke(in_tensors, out_tensor, axis); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} +} // namespace concat + +OpKernel *getConcatNode() +{ + static OpKernel kernel = {concat::prepareConcat, concat::invokeConcat}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/interp/operations/Conv2D.cc b/runtimes/neurun/core/src/exec/interp/operations/Conv2D.cc new file mode 100644 index 000000000..92f4f6415 --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/operations/Conv2D.cc @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Conv.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "model/operation/Conv2DNode.h" +#include "util/Utils.h" +#include "util/Padding.h" +#include "util/ShapeInference.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace conv2d +{ + +void prepareConv2D(ExecEnv *env, const model::Operation &node) +{ + const auto in_index = node.getInputs().at(model::operation::Conv2DNode::INPUT); + const auto kernel_index = node.getInputs().at(model::operation::Conv2DNode::KERNEL); + const auto bias_index = node.getInputs().at(model::operation::Conv2DNode::BIAS); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto kernel_tensor = env->tensorAt(kernel_index); + const auto bias_tensor = env->tensorAt(bias_index); + + assert(in_tensor->num_dimensions() == 4); + assert(kernel_tensor->num_dimensions() == 4); + assert(bias_tensor->num_dimensions() == 1); + + UNUSED_RELEASE(in_tensor); + UNUSED_RELEASE(kernel_tensor); + UNUSED_RELEASE(bias_tensor); + + const auto output_info = env->model().operands.at(out_index).info(); + if (output_info.total_size() == 0) + { + // Handle unspecified output shape + const auto &conv_node = + nnfw::misc::polymorphic_downcast<const model::operation::Conv2DNode &>(node); + const auto infered_output_shapes = shape_inference::inferConv2DShape( + in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), conv_node.param()); + env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()}); + } + else + { + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 4); +} + +void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor, + const ITensor *ofm_tensor, const model::operation::Conv2DNode::Param ¶m) +{ + // TODO Support NCHW frontned + const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(model::Layout::NHWC); + const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(model::Layout::NHWC); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &ker_shape = ker_tensor->tensorInfo().shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape, + param.stride, ker_width, ker_height); + + // Calculate + float activation_min, activation_max; + calculateActivationRange(param.activation, &activation_min, &activation_max); + + nnfw::cker::ConvParams cker_param; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + cker_param.dilation_width_factor = 1; + cker_param.dilation_height_factor = 1; + cker_param.float_activation_min = activation_min; + cker_param.float_activation_max = activation_max; + + const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape()); + const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape()); + const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape()); + const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape()); + const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO()); + const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO()); + const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO()); + float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer()); + + nnfw::cker::Conv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, cker_bias_shape, + bias_ptr, cker_ofm_shape, ofm_ptr); +} + +void invokeConv2D(const ExecEnv *env, const model::Operation &node) +{ + const auto &conv_node = + nnfw::misc::polymorphic_downcast<const model::operation::Conv2DNode &>(node); + + const auto ifm_index = node.getInputs().at(model::operation::Conv2DNode::INPUT); + const auto ker_index = node.getInputs().at(model::operation::Conv2DNode::KERNEL); + const auto bias_index = node.getInputs().at(model::operation::Conv2DNode::BIAS); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto bias_tensor = env->tensorAt(bias_index); + const auto ofm_tensor = env->tensorAt(ofm_index); + + const auto data_type = ifm_tensor->data_type(); + if (data_type == model::DataType::FLOAT32) + { + invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} +} // namespace conv2d + +OpKernel *getConv2DNode() +{ + static OpKernel kernel = {conv2d::prepareConv2D, conv2d::invokeConv2D}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/interp/operations/DepthwiseConv.cc b/runtimes/neurun/core/src/exec/interp/operations/DepthwiseConv.cc new file mode 100644 index 000000000..e1e7c0674 --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/operations/DepthwiseConv.cc @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/DepthwiseConv.h> +#include <misc/polymorphic_downcast.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "model/operation/DepthwiseConv2DNode.h" +#include "util/Padding.h" +#include "util/Utils.h" +#include "util/ShapeInference.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +namespace +{ + +void prepareDepthwiseConv(ExecEnv *env, const model::Operation &node) +{ + const auto in_index = node.getInputs().at(model::operation::DepthwiseConv2DNode::INPUT); + const auto kernel_index = node.getInputs().at(model::operation::DepthwiseConv2DNode::KERNEL); + const auto bias_index = node.getInputs().at(model::operation::DepthwiseConv2DNode::BIAS); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto kernel_tensor = env->tensorAt(kernel_index); + const auto bias_tensor = env->tensorAt(bias_index); + + assert(in_tensor->num_dimensions() == 4); + assert(kernel_tensor->num_dimensions() == 4); + assert(bias_tensor->num_dimensions() == 1); + + UNUSED_RELEASE(in_tensor); + UNUSED_RELEASE(kernel_tensor); + UNUSED_RELEASE(bias_tensor); + + // TODO handle unspecified output shape: + // calculate output shape using ifm shape, kernel shape, padding, stride + const auto output_info = env->model().operands.at(out_index).info(); + if (output_info.total_size() == 0) + { + // Handle unspecified output shape + const auto &depth_conv_node = + nnfw::misc::polymorphic_downcast<const model::operation::DepthwiseConv2DNode &>(node); + const auto infered_output_shapes = shape_inference::inferDepthwiseConv2DShape( + in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), + depth_conv_node.param()); + env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()}); + } + else + { + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 4); +} + +void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor, + const ITensor *ofm_tensor, const model::operation::DepthwiseConv2DNode::Param ¶m) +{ + // TODO Support NCHW frontend + const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(model::Layout::NHWC); + const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(model::Layout::NHWC); + // Kernel format is [1, kernel_height, kernel_width, depth_out]. + const auto &ker_shape = ker_tensor->tensorInfo().shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape, + param.stride, ker_width, ker_height); + + // Calculate + float activation_min, activation_max; + calculateActivationRange(param.activation, &activation_min, &activation_max); + + nnfw::cker::DepthwiseConvParams cker_param; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.depth_multiplier = param.multiplier; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + cker_param.dilation_width_factor = 1; + cker_param.dilation_height_factor = 1; + cker_param.float_activation_min = activation_min; + cker_param.float_activation_max = activation_max; + + const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape()); + const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape()); + const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape()); + const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape()); + const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO()); + const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO()); + const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO()); + float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer()); + + nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, + cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr); +} + +void invokeDepthwiseConv(const ExecEnv *env, const model::Operation &node) +{ + const auto &conv_node = static_cast<const model::operation::DepthwiseConv2DNode &>(node); + + const auto ifm_index = node.getInputs().at(model::operation::DepthwiseConv2DNode::INPUT); + const auto ker_index = node.getInputs().at(model::operation::DepthwiseConv2DNode::KERNEL); + const auto bias_index = node.getInputs().at(model::operation::DepthwiseConv2DNode::BIAS); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto bias_tensor = env->tensorAt(bias_index); + const auto ofm_tensor = env->tensorAt(ofm_index); + + const auto data_type = ifm_tensor->data_type(); + if (data_type == model::DataType::FLOAT32) + { + invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} + +} // namespace + +OpKernel *getDepthwiseConvNode() +{ + static OpKernel kernel = {prepareDepthwiseConv, invokeDepthwiseConv}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/interp/operations/FullyConnected.cc b/runtimes/neurun/core/src/exec/interp/operations/FullyConnected.cc new file mode 100644 index 000000000..466c220b1 --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/operations/FullyConnected.cc @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/FullyConnected.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "model/operation/FullyConnectedNode.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace fc +{ + +void prepareFC(ExecEnv *env, const model::Operation &node) +{ + const auto in_index = node.getInputs().at(model::operation::FullyConnectedNode::INPUT); + const auto kernel_index = node.getInputs().at(model::operation::FullyConnectedNode::WEIGHT); + const auto bias_index = node.getInputs().at(model::operation::FullyConnectedNode::BIAS); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto kernel_tensor = env->tensorAt(kernel_index); + const auto bias_tensor = env->tensorAt(bias_index); + + UNUSED_RELEASE(in_tensor); + UNUSED_RELEASE(kernel_tensor); + UNUSED_RELEASE(bias_tensor); + + assert(in_tensor->num_dimensions() >= 2); + assert(kernel_tensor->num_dimensions() == 2); + assert(bias_tensor->num_dimensions() == 1); + + const auto input_size_with_batch = in_tensor->num_elements(); + const auto num_units = kernel_tensor->dimension(0); + const auto input_size = kernel_tensor->dimension(1); + const auto batch_size = input_size_with_batch / input_size; + assert(input_size_with_batch % input_size == 0); + assert(num_units == bias_tensor->dimension(0)); + + // Make output tensor info + model::Shape output_shape(2); + output_shape.dim(0) = batch_size; + output_shape.dim(1) = num_units; + const model::OperandInfo out_info{output_shape, in_tensor->tensorInfo().typeInfo()}; + env->allocateIfNeeded(out_index, out_info); + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 2); + assert(out_tensor->dimension(0) == batch_size); + assert(out_tensor->dimension(1) == num_units); +} + +void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor, + const ITensor *ofm_tensor, const model::operation::FullyConnectedNode::Param ¶m) +{ + const auto ifm_buffer = ifm_tensor->bufferRO(); + const auto ker_buffer = ker_tensor->bufferRO(); + const auto bias_buffer = bias_tensor->bufferRO(); + auto ofm_buffer = ofm_tensor->buffer(); + + // Calculate + nnfw::cker::FullyConnectedParams cker_param; + calculateActivationRange(param.activation, &cker_param.float_activation_min, + &cker_param.float_activation_max); + const auto cker_ifm_shape = convertExtendShape(ifm_tensor->tensorInfo().shape()); + const auto cker_ker_shape = convertExtendShape(ker_tensor->tensorInfo().shape()); + const auto cker_bias_shape = convertExtendShape(bias_tensor->tensorInfo().shape()); + const auto cker_ofm_shape = convertExtendShape(ofm_tensor->tensorInfo().shape()); + const float *ifm_ptr = reinterpret_cast<const float *>(ifm_buffer); + const float *ker_ptr = reinterpret_cast<const float *>(ker_buffer); + const float *bias_ptr = reinterpret_cast<const float *>(bias_buffer); + float *ofm_ptr = reinterpret_cast<float *>(ofm_buffer); + + nnfw::cker::FullyConnected(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, + cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr); +} + +void invokeFC(const ExecEnv *env, const model::Operation &node) +{ + const auto &conv_node = + nnfw::misc::polymorphic_downcast<const model::operation::FullyConnectedNode &>(node); + + const auto ifm_index = node.getInputs().at(model::operation::FullyConnectedNode::INPUT); + const auto ker_index = node.getInputs().at(model::operation::FullyConnectedNode::WEIGHT); + const auto bias_index = node.getInputs().at(model::operation::FullyConnectedNode::BIAS); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto bias_tensor = env->tensorAt(bias_index); + const auto ofm_tensor = env->tensorAt(ofm_index); + + const auto data_type = ifm_tensor->data_type(); + if (data_type == model::DataType::FLOAT32) + { + invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float only"}; + } +} +} // namespace fc + +OpKernel *getFullyConnectedNode() +{ + static OpKernel kernel = {fc::prepareFC, fc::invokeFC}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/interp/operations/MaxPool2D.cc b/runtimes/neurun/core/src/exec/interp/operations/MaxPool2D.cc new file mode 100644 index 000000000..e53fa1473 --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/operations/MaxPool2D.cc @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/MaxPool.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "model/operation/MaxPool2DNode.h" +#include "util/Utils.h" +#include "util/Padding.h" +#include "util/ShapeInference.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void prepareMaxPool2D(ExecEnv *env, const model::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + + assert(in_tensor->num_dimensions() == 4); + UNUSED_RELEASE(in_tensor); + + const auto output_info = env->model().operands.at(out_index).info(); + if (output_info.total_size() == 0) + { + // Handle unspecified output shape + const auto &maxpool_node = + nnfw::misc::polymorphic_downcast<const model::operation::MaxPool2DNode &>(node); + const auto infered_output_shapes = + shape_inference::inferMaxPoolShape(in_tensor->tensorInfo().shape(), maxpool_node.param()); + env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()}); + } + else + { + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 4); +} + +void invoke(const ITensor *in_tensor, const ITensor *out_tensor, + const model::operation::MaxPool2DNode::Param ¶m) +{ + // TODO support NCHW frontend + const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(model::Layout::NHWC); + const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(model::Layout::NHWC); + const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape, + param.stride, param.kw, param.kh); + // Calculate + nnfw::cker::MaxPoolParams cker_param; + calculateActivationRange(param.activation, &cker_param.float_activation_min, + &cker_param.float_activation_max); + cker_param.filter_width = param.kw; + cker_param.filter_height = param.kh; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + + const auto in_shape = convertShape(in_tensor->tensorInfo().shape()); + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO()); + float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer()); + + nnfw::cker::MaxPool(cker_param, in_shape, in_ptr, out_shape, out_ptr); +} + +void invokeMaxPool2D(const ExecEnv *env, const model::Operation &node) +{ + const auto &maxpool_node = + nnfw::misc::polymorphic_downcast<const model::operation::MaxPool2DNode &>(node); + + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto out_tensor = env->tensorAt(out_index); + + const auto data_type = in_tensor->data_type(); + if (data_type == model::DataType::FLOAT32) + { + invoke(in_tensor, out_tensor, maxpool_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} +} // namespace + +OpKernel *getMaxPool2DNode() +{ + static OpKernel kernel = {prepareMaxPool2D, invokeMaxPool2D}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/interp/operations/OperationUtil.h b/runtimes/neurun/core/src/exec/interp/operations/OperationUtil.h new file mode 100644 index 000000000..4d2b4e1d8 --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/operations/OperationUtil.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_ +#define __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_ + +#include "model/Shape.h" +#include "model/InternalType.h" + +#include <cker/Shape.h> + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +inline nnfw::cker::Shape convertShape(const model::Shape &shape) +{ + auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end()); + + std::vector<int32_t> raw_shape; + raw_shape.resize(4); + + for (uint32_t i = 0; i < 4; ++i) + { + if (i >= dimensions.size()) + { + raw_shape[i] = 1; + } + else + { + raw_shape[i] = dimensions[i]; + } + } + + return nnfw::cker::GetShape(raw_shape); +} + +inline nnfw::cker::Shape convertExtendShape(const model::Shape &shape) +{ + auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end()); + + std::vector<int32_t> raw_shape; + raw_shape.resize(4); + uint32_t start = 4 - dimensions.size(); + + for (uint32_t i = 0; i < 4; ++i) + { + if (i < start) + { + raw_shape[i] = 1; + } + else + { + raw_shape[i] = dimensions[i - start]; + } + } + + return nnfw::cker::GetShape(raw_shape); +} + +template <typename T> +void calculateActivationRange(model::Activation activation, T *activation_min, T *activation_max) +{ + if (activation == model::Activation::RELU) + { + *activation_min = 0; + *activation_max = std::numeric_limits<T>::max(); + } + else if (activation == model::Activation::RELU6) + { + *activation_min = 0; + *activation_max = 6; + } + else if (activation == model::Activation::RELU1) + { + *activation_min = -1; + *activation_max = 1; + } + else if (activation == model::Activation::NONE) + { + *activation_min = std::numeric_limits<T>::lowest(); + *activation_max = std::numeric_limits<T>::max(); + } + else + { + throw std::runtime_error{"Unsupported activation type"}; + } +} + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_ diff --git a/runtimes/neurun/core/src/exec/interp/operations/Reshape.cc b/runtimes/neurun/core/src/exec/interp/operations/Reshape.cc new file mode 100644 index 000000000..a45c3b3f2 --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/operations/Reshape.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/interp/Registration.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void prepare(ExecEnv *env, const model::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + // Unspecified shape is not supported in operation node spec now + const auto output_info = env->model().operands.at(out_index).info(); + env->allocateAndShareIfNeeded(out_index, output_info, in_index); + + assert(output_info.total_size() == env->model().operands.at(in_index).info().total_size()); +} + +void invoke(const ExecEnv *env, const model::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + if (env->tensorAt(in_index)->bufferRO() == env->tensorAt(out_index)->bufferRO()) + { + // Same data + return; + } + + const auto output_info = env->model().operands.at(out_index).info(); + memcpy(env->tensorAt(out_index)->buffer(), env->tensorAt(in_index)->bufferRO(), + output_info.total_size()); +} + +} // namespace {anonymous} + +OpKernel *getReshapeNode() +{ + static OpKernel kernel = {prepare, invoke}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtimes/neurun/core/src/exec/interp/operations/SoftMax.cc b/runtimes/neurun/core/src/exec/interp/operations/SoftMax.cc new file mode 100644 index 000000000..07865969b --- /dev/null +++ b/runtimes/neurun/core/src/exec/interp/operations/SoftMax.cc @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/SoftMax.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "model/operation/SoftmaxNode.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void Softmax2D(const float *in, const int input_size, const int batch_size, const float beta, + float *out) +{ + assert(input_size > 0); + + // For each batch + for (int b = 0; b < batch_size; b++) + { + // Find the max coeff. + float max_coeff = in[0]; + for (int i = 1; i < input_size; i++) + { + if (in[i] > max_coeff) + max_coeff = in[i]; + } + + // Compute the normalized sum of exps. + float exp_sum = 0.0; + for (int i = 0; i < input_size; i++) + { + out[i] = std::exp((in[i] - max_coeff) * beta); + exp_sum += out[i]; + } + + // Divide by the sum of exps. + float reciprocal_sum_exp = 1.f / exp_sum; + for (int i = 0; i < input_size; i++) + { + out[i] *= reciprocal_sum_exp; + } + + // Advance in and out pointers for the next batch. + in += input_size; + out += input_size; + } +} + +void prepareSoftMax(ExecEnv *env, const model::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + UNUSED_RELEASE(in_tensor); + + assert((in_tensor->num_dimensions() == 4) || (in_tensor->num_dimensions() == 2)); + + // Output shape should be same with input + // Output type is pre-defined in model + const auto output_shape = env->model().operands.at(in_index).info().shape(); + const auto output_type = env->model().operands.at(out_index).info().typeInfo(); + + const model::OperandInfo output_info{output_shape, output_type}; + env->allocateIfNeeded(out_index, output_info); + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Check output shape is same with input + assert(out_tensor->num_dimensions() == out_tensor->num_dimensions()); + for (uint32_t i = 0; i < in_tensor->num_dimensions(); i++) + { + assert(in_tensor->dimension(i) == out_tensor->dimension(i)); + } +} + +void invoke(const ITensor *in_tensor, const ITensor *out_tensor, + const model::operation::SoftmaxNode::Param ¶m) +{ + const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO()); + float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer()); + + float beta = param.beta; + + if (in_tensor->num_dimensions() == 2) + { + uint32_t batch_size = in_tensor->dimension(0); + uint32_t input_size = in_tensor->dimension(1); + + Softmax2D(in_ptr, input_size, batch_size, beta, out_ptr); + } + else if (in_tensor->num_dimensions() == 4) + { + const auto in_shape = convertShape(in_tensor->tensorInfo().shape()); + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + + nnfw::cker::SoftmaxParams cker_param; + cker_param.beta = beta; + + nnfw::cker::Softmax(cker_param, in_shape, in_ptr, out_shape, out_ptr); + } + else + { + throw std::runtime_error{"Unsuported input dimension: support 2D or 4D"}; + } +} + +void invokeSoftMax(const ExecEnv *env, const model::Operation &node) +{ + const auto &softmax_node = + nnfw::misc::polymorphic_downcast<const model::operation::SoftmaxNode &>(node); + + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto out_tensor = env->tensorAt(out_index); + + const auto in_data_type = in_tensor->data_type(); + const auto out_data_type = out_tensor->data_type(); + if ((in_data_type == model::DataType::FLOAT32) && (out_data_type == model::DataType::FLOAT32)) + { + invoke(in_tensor, out_tensor, softmax_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} + +} // namespace + +OpKernel *getSoftMaxNode() +{ + static OpKernel kernel = {prepareSoftMax, invokeSoftMax}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun |