diff options
Diffstat (limited to 'runtime/neurun/core/src/exec')
49 files changed, 5603 insertions, 0 deletions
diff --git a/runtime/neurun/core/src/exec/DataflowExecutor.cc b/runtime/neurun/core/src/exec/DataflowExecutor.cc new file mode 100644 index 000000000..e22d41031 --- /dev/null +++ b/runtime/neurun/core/src/exec/DataflowExecutor.cc @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DataflowExecutor.h" + +#include <cassert> + +#include "util/logging.h" + +namespace neurun +{ +namespace exec +{ + +int64_t DataflowExecutor::calculateRank(const std::vector<ir::Element> &operations) +{ + int64_t rank = 0; + if (!_indexed_ranks) + { + return rank; + } + for (const auto &element : operations) + { + auto it = _indexed_ranks->find(element.index); + if (it == _indexed_ranks->end()) + { + assert(element.node->opcode() == ir::OpCode::Permute); + // assign int32_t::max to prevent integer overflow + rank += std::numeric_limits<int32_t>::max(); + } + else + { + rank += it->second; + } + } + return rank; +} + +void DataflowExecutor::emplaceToReadyJobs(const uint32_t &id) +{ + auto &job = _waiting_jobs[id]; + assert(job != nullptr); + auto &subg = _graph.subgraphs()->at(_job_to_op_seq[job->index()]); + auto rank = calculateRank(subg.operations()); + _ready_jobs.emplace(rank, std::move(job)); +} + +void DataflowExecutor::notify(uint32_t finished_job_id) +{ + for (auto id : _output_info[finished_job_id]) + { + assert(_input_info[id] > 0); + auto count = --_input_info[id]; + if (count == 0) // No dependent jobs left, ready for execution + { + emplaceToReadyJobs(id); + } + } +} +bool DataflowExecutor::noWaitingJobs() +{ + return std::all_of(_waiting_jobs.begin(), _waiting_jobs.end(), + [](const std::unique_ptr<Job> &job) { return job == nullptr; }); +} + +DataflowExecutor::DataflowExecutor(const ir::Graph &graph, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, + CodeMap &&code_map) + : ExecutorBase{graph, operand_context, std::move(tensor_mgrs)}, _code_map{std::move(code_map)} +{ + VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl; + + const ir::Subgraphs *subgraphs = _graph.subgraphs(); + // Assign jobs convert SubgraphIndex to job index(uint32_t) + uint32_t next_job_index = 0; + std::unordered_map<ir::SubgraphIndex, uint32_t> subgraph_to_job; + subgraphs->iterate([&](const ir::SubgraphIndex &subg_index, const ir::OpSequence &) { + VERBOSE(DataflowExecutor) << "Create a job #" << next_job_index << " with SubgraphIndex " + << subg_index.value() << std::endl; + _finished_jobs.emplace_back( + nnfw::cpp14::make_unique<Job>(next_job_index, _code_map.at(subg_index).get())); + subgraph_to_job[subg_index] = next_job_index++; + }); + + _waiting_jobs.resize(next_job_index); + _output_info.resize(next_job_index); + _initial_input_info.resize(next_job_index, 0); + + subgraphs->iterate([&](const ir::SubgraphIndex &subg_index, const ir::OpSequence &subg) { + auto job_index = subgraph_to_job[subg_index]; + for (auto output : subg.getOutputs()) + { + // Update output and input info + subgraphs->iterate( + [&](const ir::SubgraphIndex &subg_cur_index, const ir::OpSequence &subg_cur) { + if (subg_cur.getInputs().contains(output)) + { + auto dep_index = subgraph_to_job[subg_cur_index]; + ++_initial_input_info[dep_index]; + _output_info[job_index].push_back(dep_index); + } + }); + } + }); + for (const auto &s : subgraph_to_job) + _job_to_op_seq.emplace(s.second, s.first); + + _input_info = _initial_input_info; +} + +void DataflowExecutor::executeImpl() +{ + assert(noWaitingJobs()); + + // Execution setup + _waiting_jobs.swap(_finished_jobs); // Move finished jobs to waiting jobs + + for (uint32_t i = 0; i < _waiting_jobs.size(); ++i) + { + if (_input_info[i] == 0) + { + emplaceToReadyJobs(i); + } + } + assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs + bool is_profiling = util::getConfigBool(util::config::PROFILING_MODE); + + _subject.notifyModelBegin(this); + + while (!_ready_jobs.empty()) + { + auto job = std::move((_ready_jobs.begin())->second); + _ready_jobs.erase(_ready_jobs.begin()); + auto job_index = job->index(); + VERBOSE(DataflowExecutor) << "Run job #" << job_index << std::endl; + + auto subgraph_index = _job_to_op_seq[job_index]; + auto op_seq = &_graph.subgraphs()->at(subgraph_index); + const backend::Backend *backend = + _graph.getLowerInfo()->operation.at(subgraph_index)->backend(); + + _subject.notifyJobBegin(this, op_seq, backend); + + if (is_profiling) + job->fn()->runSync(); + else + job->run(); + + _subject.notifyJobEnd(this, op_seq, backend); + notify(job_index); + _finished_jobs[job_index] = std::move(job); + } + assert(noWaitingJobs()); + + _subject.notifyModelEnd(this); + + // Reset input info for the next execution + _input_info = _initial_input_info; +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/DataflowExecutor.h b/runtime/neurun/core/src/exec/DataflowExecutor.h new file mode 100644 index 000000000..6c12093fd --- /dev/null +++ b/runtime/neurun/core/src/exec/DataflowExecutor.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_DATAFLOW_EXECUTOR_H__ +#define __NEURUN_EXEC_DATAFLOW_EXECUTOR_H__ + +#include <list> +#include <map> +#include <unordered_map> + +#include "FunctionSequence.h" +#include "Job.h" +#include "ir/OperandIndexSequence.h" +#include "ir/Index.h" +#include "cpp14/memory.h" +#include "exec/ExecutorBase.h" + +namespace neurun +{ +namespace exec +{ + +class DataflowExecutor : public ExecutorBase +{ +public: + using CodeMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<FunctionSequence>>; + +protected: + virtual void notify(uint32_t finished_job_id); + bool noWaitingJobs(); + +public: + /** + * @brief Constructs a DataflowExecutor object + * + * @param graph Graph object + * @param operand_context (Only for input/output operand data access) + * @param code_map Compiled code map + * @param ranks Operation ranks for ordering execution + */ + DataflowExecutor(const ir::Graph &graph, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, CodeMap &&code_map); + + void executeImpl() override; + +protected: + int64_t calculateRank(const std::vector<ir::Element> &operations); + void emplaceToReadyJobs(const uint32_t &id); + +protected: + CodeMap _code_map; + /** + * @brief A vector of finished jobs for current execution + * After a run it has all the jobs of this execution for the next run + */ + std::vector<std::unique_ptr<Job>> _finished_jobs; + /** + * @brief A vector of waiting jobs for current execution + * All the jobs are moved from #_finished_jobs to it when start a run + */ + std::vector<std::unique_ptr<Job>> _waiting_jobs; + /** + * @brief Jobs' output info + * Used for notifying after finishing a job + */ + std::vector<std::list<uint32_t>> _output_info; + std::vector<uint32_t> _initial_input_info; + std::vector<uint32_t> _input_info; + /** + * @brief A collection of jobs that are ready for execution + * Jobs in it are ready to be scheduled. + * Ordered by priority from `_indexed_ranks` + */ + std::multimap<int64_t, std::unique_ptr<Job>, std::greater<int64_t>> _ready_jobs; + + /// @brief Which job runs which op and function. + std::unordered_map<uint32_t, ir::SubgraphIndex> _job_to_op_seq; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_DATAFLOW_EXECUTOR_H__ diff --git a/runtime/neurun/core/src/exec/Execution.cc b/runtime/neurun/core/src/exec/Execution.cc new file mode 100644 index 000000000..bc7bbd160 --- /dev/null +++ b/runtime/neurun/core/src/exec/Execution.cc @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/Execution.h" + +#include "util/logging.h" + +namespace neurun +{ +namespace exec +{ + +Execution::Execution(const std::shared_ptr<IExecutor> &executor) : _executor{executor} +{ + _io_desc.inputs.resize(_executor->graph().getInputs().size()); + _io_desc.outputs.resize(_executor->graph().getOutputs().size()); +} + +// TODO Remove default parameter +void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t length, + ir::Layout layout) +{ + const auto input_index = graph().getInputs().at(index); + const auto info = graph().operands().at(input_index).info(); + + if (length < info.total_size()) + { + throw std::runtime_error{"Too small length"}; + } + + _io_desc.inputs.at(index.value()) = + nnfw::cpp14::make_unique<InputDesc>(info, buffer, length, layout); +} + +// TODO Remove default parameter +void Execution::setInput(const ir::IOIndex &index, const ir::TypeInfo &type, const ir::Shape &shape, + const void *buffer, size_t length, ir::Layout layout) +{ + const ir::OperandInfo info{shape, type}; + + if (length < info.total_size()) + { + throw std::runtime_error{"Too small length"}; + } + + _io_desc.inputs.at(index.value()) = + nnfw::cpp14::make_unique<InputDesc>(info, buffer, length, layout); +} + +// TODO Remove default parameter +void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length, ir::Layout layout) +{ + const auto output_index = graph().getOutputs().at(index); + const auto info = graph().operands().at(output_index).info(); + + if (length < info.total_size()) + { + throw std::runtime_error{"Too small length"}; + } + + _io_desc.outputs.at(index.value()) = + nnfw::cpp14::make_unique<OutputDesc>(info, buffer, length, layout); +} + +// TODO Remove default parameter +void Execution::setOutput(const ir::IOIndex &index, const ir::TypeInfo &type, + const ir::Shape &shape, void *buffer, size_t length, ir::Layout layout) +{ + const ir::OperandInfo info{shape, type}; + + if (length < info.total_size()) + { + throw std::runtime_error{"Too small length"}; + } + + _io_desc.outputs.at(index.value()) = + nnfw::cpp14::make_unique<OutputDesc>(info, buffer, length, layout); +} + +void Execution::setInputLayout(const ir::IOIndex &index, ir::Layout layout) +{ + const auto &input_desc = _io_desc.inputs.at(index.value()); + _io_desc.inputs.at(index.value()) = nnfw::cpp14::make_unique<InputDesc>( + input_desc->info, input_desc->buffer, input_desc->size, layout); +} + +void Execution::setOutputLayout(const ir::IOIndex &index, ir::Layout layout) +{ + const auto &output_desc = _io_desc.outputs.at(index.value()); + _io_desc.outputs.at(index.value()) = nnfw::cpp14::make_unique<OutputDesc>( + output_desc->info, output_desc->buffer, output_desc->size, layout); +} + +void Execution::execute() +{ + VERBOSE(Execution) << "Start execution" << std::endl; + + _executor->execute(_io_desc); + finished = true; + + VERBOSE(Execution) << "Execution finished" << std::endl; +} + +void Execution::startExecute() +{ + VERBOSE(Execution) << "Create asynchronous execution thread" << std::endl; + + _exec_thread = nnfw::cpp14::make_unique<std::thread>(&Execution::execute, this); +} + +void Execution::waitFinish() +{ + VERBOSE(Execution) << "Wait to finish execution" << std::endl; + + _exec_thread->join(); + finished = true; +} + +bool Execution::isFinished(void) const { return finished; } + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/ExecutionObservee.cc b/runtime/neurun/core/src/exec/ExecutionObservee.cc new file mode 100644 index 000000000..3b342d703 --- /dev/null +++ b/runtime/neurun/core/src/exec/ExecutionObservee.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ExecutionObservee.h" + +namespace neurun +{ +namespace exec +{ + +void ExecutionObservee::add(std::unique_ptr<IExecutionObserver> observer) +{ + _observers.emplace_back(std::move(observer)); +} + +void ExecutionObservee::notifyModelBegin(IExecutor *executor) +{ + for (auto &o : _observers) + { + o->handleBegin(executor); + } +} + +void ExecutionObservee::notifyModelEnd(IExecutor *executor) +{ + for (auto &o : _observers) + { + o->handleEnd(executor); + } +} + +void ExecutionObservee::notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq, + const backend::Backend *backend) +{ + for (auto &o : _observers) + { + o->handleBegin(executor, op_seq, backend); + } +} + +void ExecutionObservee::notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq, + const backend::Backend *backend) +{ + for (auto &o : _observers) + { + o->handleEnd(executor, op_seq, backend); + } +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/ExecutionObservee.h b/runtime/neurun/core/src/exec/ExecutionObservee.h new file mode 100644 index 000000000..dafeef55b --- /dev/null +++ b/runtime/neurun/core/src/exec/ExecutionObservee.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_EXECUTION_OBSERVEE_H__ +#define __NEURUN_EXEC_EXECUTION_OBSERVEE_H__ + +#include <list> + +#include "exec/ExecutionObservers.h" + +namespace neurun +{ +namespace exec +{ + +/** + * @brief Class that + * + */ +class ExecutionObservee +{ +public: + /** + * @brief Register an observer + * + * @param observer Observer to be added + */ + void add(std::unique_ptr<IExecutionObserver> observer); + void notifyModelBegin(IExecutor *executor); + void notifyModelEnd(IExecutor *executor); + void notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq, + const backend::Backend *backend); + void notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq, + const backend::Backend *backend); + +private: + std::list<std::unique_ptr<IExecutionObserver>> _observers; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_EXECUTION_OBSERVEE__ diff --git a/runtime/neurun/core/src/exec/ExecutionObservers.cc b/runtime/neurun/core/src/exec/ExecutionObservers.cc new file mode 100644 index 000000000..071a9e228 --- /dev/null +++ b/runtime/neurun/core/src/exec/ExecutionObservers.cc @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/ExecutionObservers.h" + +#include <string> + +#include "util/logging.h" +#include "ir/operation/Permute.h" +#include "exec/IExecutor.h" +#include "misc/polymorphic_downcast.h" +#include "ir/OpSequence.h" + +namespace neurun +{ + +namespace exec +{ + +void ProfileObserver::handleBegin(neurun::exec::IExecutor *, const ir::OpSequence *, + const neurun::backend::Backend *backend) +{ + _timer = backend->config()->timer(); + if (_timer == nullptr) + throw std::runtime_error("To profile backend timer() method must be implemented"); + _timer->handleBegin(); +} + +void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq, + const backend::Backend *backend) +{ + _timer->handleEnd(); + const auto timer_res = _timer->getTime(); + + // NOTE This assumes there is just one operation in a op_seq + auto node = op_seq->operations().at(0).node; + auto node_name = node->name(); + VERBOSE(ProfileInfo) << "Time for " << node_name << " : " << timer_res << std::endl; + + // fill ExecTime: + bool is_quantized = exec->graph().operands().at(node->getInputs().at(0)).typeInfo().type() == + ir::DataType::QUANT8_ASYMM; + + uint32_t size = 0; + for (const auto &input : node->getInputs()) + { + size += exec->graph().operands().at(input).info().total_size(); + } + for (const auto &output : node->getOutputs()) + { + size += exec->graph().operands().at(output).info().total_size(); + } + if (node_name == "Permute") + { + auto *permute_node = nnfw::misc::polymorphic_downcast<const ir::operation::Permute *>(node); + assert(permute_node != nullptr); + _et->updatePermuteTime(permute_node->param().input_backend_ctx->backend, + permute_node->param().output_backend_ctx->backend, is_quantized, size, + timer_res); + } + else + { + _et->updateOperationExecTime(backend, node_name, is_quantized, size, timer_res); + } +}; + +ChromeTracingObserver::ChromeTracingObserver(const std::string &filepath) + : _ofs{filepath, std::ofstream::out}, _recorder{}, _collector{&_recorder} +{ +} + +ChromeTracingObserver::~ChromeTracingObserver() { _recorder.writeToFile(_ofs); } + +void ChromeTracingObserver::handleBegin(IExecutor *) +{ + _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "runtime", "Graph"}); +} + +void ChromeTracingObserver::handleBegin(IExecutor *, const ir::OpSequence *op_seq, + const backend::Backend *backend) +{ + std::string backend_id = backend->config()->id(); + _collector.onEvent( + EventCollector::Event{EventCollector::Edge::BEGIN, backend_id, subgraphTag(op_seq)}); +} + +void ChromeTracingObserver::handleEnd(IExecutor *, const ir::OpSequence *op_seq, + const backend::Backend *backend) +{ + std::string backend_id = backend->config()->id(); + _collector.onEvent( + EventCollector::Event{EventCollector::Edge::END, backend_id, subgraphTag(op_seq)}); +} + +void ChromeTracingObserver::handleEnd(IExecutor *) +{ + _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, "runtime", "Graph"}); +} + +std::string ChromeTracingObserver::subgraphTag(const ir::OpSequence *op_seq) +{ + if (op_seq->size() == 0) + return "Empty OpSequence"; + + auto first_op = op_seq->operations().at(0); + std::string tag = "$" + std::to_string(first_op.index.value()); + tag += " " + first_op.node->name(); + if (op_seq->size() > 1) + { + tag += " (+" + std::to_string(op_seq->size() - 1) + ")"; + } + return tag; +} + +} // namespace exec + +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/ExecutorBase.cc b/runtime/neurun/core/src/exec/ExecutorBase.cc new file mode 100644 index 000000000..9692c2ba7 --- /dev/null +++ b/runtime/neurun/core/src/exec/ExecutorBase.cc @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ExecutorBase.h" +#include "util/logging.h" +namespace neurun +{ +namespace exec +{ + +ExecutorBase::ExecutorBase(const ir::Graph &graph, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs) + : _graph{graph}, _operand_context{operand_context}, _tensor_mgrs{std::move(tensor_mgrs)}, + _mutex() +{ + // DO NOTHING +} + +std::unique_ptr<ISource> ExecutorBase::source(const ir::IOIndex &index, const ir::TypeInfo &type, + const void *buffer, size_t length, + ir::Layout io_layout) +{ + using ir::DataType; + switch (type.type()) + { + case DataType::FLOAT32: + return source<float>(index, buffer, length, io_layout); + case DataType::INT32: + return source<int32_t>(index, buffer, length, io_layout); + case DataType::UINT32: + return source<uint32_t>(index, buffer, length, io_layout); + case DataType::BOOL8: + case DataType::QUANT8_ASYMM: + case DataType::UINT8: + return source<uint8_t>(index, buffer, length, io_layout); + case DataType::QUANT8_SYMM: + return source<int8_t>(index, buffer, length, io_layout); + default: + throw std::runtime_error("Not supported yet"); + } +} + +std::unique_ptr<ISink> ExecutorBase::sink(const ir::IOIndex &index, const ir::TypeInfo &type, + void *buffer, size_t length, ir::Layout io_layout) +{ + using ir::DataType; + switch (type.type()) + { + case DataType::FLOAT32: + return sink<float>(index, buffer, length, io_layout); + case DataType::INT32: + return sink<int32_t>(index, buffer, length, io_layout); + case DataType::UINT32: + return sink<uint32_t>(index, buffer, length, io_layout); + case DataType::BOOL8: + case DataType::QUANT8_ASYMM: + case DataType::UINT8: + return sink<uint8_t>(index, buffer, length, io_layout); + case DataType::QUANT8_SYMM: + return sink<int8_t>(index, buffer, length, io_layout); + default: + throw std::runtime_error("Not supported yet"); + } +} + +void ExecutorBase::execute(const IODescription &desc) +{ + // For thread-safe, use mutex + // TODO: if all used backends on this executor are thread-safe, + // do not need to use mutex (otherwise, use mutex) + std::lock_guard<std::mutex> lock(_mutex); + + std::vector<std::unique_ptr<ISource>> sources{_graph.getInputs().size()}; + std::vector<std::unique_ptr<ISink>> sinks{_graph.getOutputs().size()}; + + // Set input(s) + for (uint32_t n = 0; n < _graph.getInputs().size(); ++n) + { + ir::IOIndex input_index{n}; + ir::OperandIndex index{_graph.getInputs().at(input_index)}; + + if (desc.inputs.at(n) == nullptr) + { + // Optional input + continue; + } + + const auto operand_li = _graph.getLowerInfo()->operand.at(index).get(); + if (operand_li->def_factors().empty()) + { + // This input is not used (i.e. constant, EX. reshape's axis) + continue; + } + + const auto &input = *desc.inputs.at(n); + sources.at(n) = + source(input_index, input.info.typeInfo(), input.buffer, input.size, input.layout); + + auto setter = [&](::neurun::backend::operand::ITensor &tensor) { sources.at(n)->push(tensor); }; + + auto object = _operand_context->at(index); + + object->access(setter); + } + + executeImpl(); + + // Get output(s) + for (uint32_t n = 0; n < _graph.getOutputs().size(); ++n) + { + ir::IOIndex output_index{n}; + // Optional output + if (desc.outputs.at(n) == nullptr) + { + continue; + } + const auto &output = *desc.outputs.at(n); + sinks.at(n) = + sink(output_index, output.info.typeInfo(), output.buffer, output.size, output.layout); + + auto getter = [&](::neurun::backend::operand::ITensor &tensor) { sinks.at(n)->pull(tensor); }; + + ir::OperandIndex index{_graph.getOutputs().at(output_index)}; + auto object = _operand_context->at(index); + + object->access(getter); + } +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/ExecutorBase.h b/runtime/neurun/core/src/exec/ExecutorBase.h new file mode 100644 index 000000000..a93e036a5 --- /dev/null +++ b/runtime/neurun/core/src/exec/ExecutorBase.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_EXECUTOR_BASE_H__ +#define __NEURUN_EXEC_EXECUTOR_BASE_H__ + +#include <mutex> + +#include "Source.h" +#include "exec/ExecutionObservers.h" +#include "Sink.h" +#include "exec/IExecutor.h" +#include "ir/Graph.h" +#include "ir/LowerInfoMap.h" +#include "backend/IConfig.h" +#include "backend/Backend.h" +#include "compiler/OperandContext.h" +#include "backend/ExecTime.h" +#include "exec/IFunction.h" +#include "backend/ITensorManager.h" +#include "exec/ExecutionObservee.h" +#include <list> + +namespace neurun +{ +namespace exec +{ + +class ExecutorBase : public IExecutor +{ +public: + ExecutorBase(const ir::Graph &graph, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs); + + virtual ~ExecutorBase() = default; + + const ir::Graph &graph() final { return _graph; } + + void execute(const IODescription &desc) final; + + // Used only in Dataflow and Parallel Executors + void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>> ranks) final + { + _indexed_ranks = std::move(ranks); + }; + + virtual void executeImpl(void) = 0; + + void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); }; + +private: + std::unique_ptr<ISource> source(const ir::IOIndex &index, const ir::TypeInfo &type, + const void *buffer, size_t length, ir::Layout io_layout); + std::unique_ptr<ISink> sink(const ir::IOIndex &index, const ir::TypeInfo &type, void *buffer, + size_t length, ir::Layout io_layout); + + template <typename T> + std::unique_ptr<ISource> source(const ir::IOIndex &index, const void *buffer, size_t length, + ir::Layout io_layout) + { + const auto operand_index = _graph.getInputs().at(index); + const auto &operand = _graph.operands().at(operand_index); + + const auto tensor = _operand_context->at(operand_index); + const auto tensor_layout = tensor->layout(); + + if (((io_layout == ir::Layout::NHWC) && (tensor_layout == ir::Layout::NCHW)) || + ((io_layout == ir::Layout::NCHW) && (tensor_layout == ir::Layout::NHWC))) + { + return nnfw::cpp14::make_unique<PermutateSource<T>>(buffer, length, operand.shape(), + io_layout); + } + // TODO Change this to return error + assert(io_layout != ir::Layout::UNKNOWN || + (tensor_layout != ir::Layout::NCHW && tensor_layout != ir::Layout::NCHW)); + + return nnfw::cpp14::make_unique<CopySource<T>>(buffer, length, operand.shape()); + } + + template <typename T> + std::unique_ptr<ISink> sink(const ir::IOIndex &index, void *buffer, size_t length, + ir::Layout io_layout) + { + const auto operand_index = _graph.getOutputs().at(index); + const auto &operand = _graph.operands().at(operand_index); + const auto tensor = _operand_context->at(operand_index); + const auto tensor_layout = tensor->layout(); + + if (((tensor_layout == ir::Layout::NCHW) && (io_layout == ir::Layout::NHWC)) || + ((tensor_layout == ir::Layout::NHWC) && (io_layout == ir::Layout::NCHW))) + { + return nnfw::cpp14::make_unique<PermutateSink<T>>(buffer, length, operand.shape(), io_layout); + } + // TODO Change this to return error + assert(io_layout != ir::Layout::UNKNOWN || + (tensor_layout != ir::Layout::NCHW && tensor_layout != ir::Layout::NCHW)); + + return nnfw::cpp14::make_unique<CopySink<T>>(buffer, length, operand.shape()); + } + +protected: + ExecutionObservee _subject; + std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks; + const ir::Graph &_graph; + std::shared_ptr<compiler::OperandContext> _operand_context; + std::unique_ptr<backend::TensorManagerSet> _tensor_mgrs; + std::mutex _mutex; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_EXECUTOR_BASE_H__ diff --git a/runtime/neurun/core/src/exec/FunctionSequence.cc b/runtime/neurun/core/src/exec/FunctionSequence.cc new file mode 100644 index 000000000..00214fcfa --- /dev/null +++ b/runtime/neurun/core/src/exec/FunctionSequence.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "FunctionSequence.h" + +namespace neurun +{ +namespace exec +{ + +void FunctionSequence::run() +{ + for (const auto &function : _functions) + { + function->run(); + } +} + +void FunctionSequence::runSync() +{ + for (const auto &function : _functions) + { + function->runSync(); + } +} + +void FunctionSequence::prepare() +{ + for (const auto &function : _functions) + { + function->prepare(); + } +} + +void FunctionSequence::append(std::unique_ptr<IFunction> &&function) +{ + _functions.push_back(std::move(function)); +} + +void FunctionSequence::iterate(const std::function<void(IFunction &)> &fn) +{ + for (const auto &func : _functions) + { + fn(*func); + } +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/FunctionSequence.h b/runtime/neurun/core/src/exec/FunctionSequence.h new file mode 100644 index 000000000..2ba5c0b08 --- /dev/null +++ b/runtime/neurun/core/src/exec/FunctionSequence.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_FUNCTION_SEQUENCE_H__ +#define __NEURUN_EXEC_FUNCTION_SEQUENCE_H__ + +#include <memory> +#include <vector> +#include <functional> + +#include "exec/IFunction.h" + +namespace neurun +{ +namespace exec +{ + +class FunctionSequence : public IFunction +{ +public: + virtual ~FunctionSequence() = default; + + void run() override; + void runSync() override; + void prepare() override; + + /** + * @brief Appends an IFunction object to the function sequence + * + * @param function IFunction object to be appended + */ + void append(std::unique_ptr<IFunction> &&function); + + void iterate(const std::function<void(IFunction &)> &fn); + +private: + std::vector<std::unique_ptr<IFunction>> _functions; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_FUNCTION_SEQUENCE_H__ diff --git a/runtime/neurun/core/src/exec/Job.cc b/runtime/neurun/core/src/exec/Job.cc new file mode 100644 index 000000000..ba02daf30 --- /dev/null +++ b/runtime/neurun/core/src/exec/Job.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Job.h" + +#include <cassert> + +#include "util/logging.h" + +namespace neurun +{ +namespace exec +{ + +Job::Job(uint32_t index, IFunction *fn) : _index{index}, _fn{fn} {} + +void Job::run() { _fn->run(); } + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/Job.h b/runtime/neurun/core/src/exec/Job.h new file mode 100644 index 000000000..1516b9281 --- /dev/null +++ b/runtime/neurun/core/src/exec/Job.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_JOB_H__ +#define __NEURUN_EXEC_JOB_H__ + +#include <unordered_set> + +#include "exec/IFunction.h" +#include "ir/Index.h" +#include "ir/OperandIndexSequence.h" +#include "backend/Backend.h" + +namespace neurun +{ +namespace exec +{ + +class Job +{ +public: + /** + * @brief Constructs a Job object + * + * @param index Operation index for this job + * @param fn compiled code to run this job + * @param inputs Input operand list + * @param outputs Output operand list + */ + Job(uint32_t index, IFunction *fn); + /** + * @brief Execute the compiled code + */ + void run(); + /** + * @brief Return job index + * + * @return Job index + */ + uint32_t index() const { return _index; } + /** + * @brief Return the function to be executed + * + * @return Pointer of the function + */ + IFunction *fn() { return _fn; } + +private: + uint32_t _index; + IFunction *_fn; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_JOB_H__ diff --git a/runtime/neurun/core/src/exec/LinearExecutor.cc b/runtime/neurun/core/src/exec/LinearExecutor.cc new file mode 100644 index 000000000..d41dba880 --- /dev/null +++ b/runtime/neurun/core/src/exec/LinearExecutor.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "LinearExecutor.h" + +namespace neurun +{ +namespace exec +{ + +void LinearExecutor::executeImpl() +{ + _subject.notifyModelBegin(this); + for (auto &&code : _code) + { + const auto op_seq = code.elem.op_seq; + const auto backend = code.elem.lower_info->backend(); + _subject.notifyJobBegin(this, op_seq, backend); + code.fn->run(); + _subject.notifyJobEnd(this, op_seq, backend); + } + _subject.notifyModelEnd(this); +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/LinearExecutor.h b/runtime/neurun/core/src/exec/LinearExecutor.h new file mode 100644 index 000000000..baf063a12 --- /dev/null +++ b/runtime/neurun/core/src/exec/LinearExecutor.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file LinearExecutor.h + * @brief This file contains LinearExecutor class to define and run execution phase + */ + +#ifndef __NEURUN_EXEC_EXECUTOR_H_ +#define __NEURUN_EXEC_EXECUTOR_H_ + +#include "ExecutorBase.h" +#include "compiler/Linear.h" +#include "exec/FunctionSequence.h" +#include "compiler/CodeWithInfo.h" + +namespace neurun +{ +namespace exec +{ + +/** + * @brief Class to handle execution phase. Simple run the sequence of operations that is sorted in + * topological order + */ +class LinearExecutor final : public ExecutorBase +{ +public: + /** + * @brief Construct a new LinearExecutor object + * @param[in] plan Execution plan generated by compiled result + */ + LinearExecutor(const ir::Graph &graph, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, + std::vector<compiler::CodeWithInfo> &&code) + : ExecutorBase{graph, operand_context, std::move(tensor_mgrs)}, _code{std::move(code)} + { + } + +public: + void executeImpl(void) override; + +private: + std::vector<compiler::CodeWithInfo> _code; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_EXECUTOR_H_ diff --git a/runtime/neurun/core/src/exec/ParallelExecutor.cc b/runtime/neurun/core/src/exec/ParallelExecutor.cc new file mode 100644 index 000000000..c73c353d3 --- /dev/null +++ b/runtime/neurun/core/src/exec/ParallelExecutor.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ParallelExecutor.h" + +#include <cassert> + +#include "util/logging.h" +#include "exec/IFunction.h" + +namespace neurun +{ +namespace exec +{ + +class HookFunction : public IFunction +{ +public: + HookFunction(IFunction *fn, const std::function<void()> &setup, + const std::function<void()> &teardown) + : _fn{fn}, _setup{setup}, _teardown{teardown} + { + } + +public: + void run() override + { + _setup(); + _fn->run(); + _teardown(); + } + void runSync() override { throw("runSync is needed just for profiling in Dataflow executor"); } + +private: + IFunction *_fn; + std::function<void()> _setup; + std::function<void()> _teardown; +}; + +void ParallelExecutor::notify(uint32_t finished_job_id) +{ + std::unique_lock<std::mutex> lock{_mu_jobs}; + + DataflowExecutor::notify(finished_job_id); + + lock.unlock(); + _cv_jobs.notify_all(); +} + +ParallelExecutor::ParallelExecutor(const ir::Graph &graph, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, + CodeMap &&code_map) + : DataflowExecutor{graph, operand_context, std::move(tensor_mgrs), std::move(code_map)} +{ + VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl; +} + +void ParallelExecutor::executeImpl() +{ + // Init scheduler + // TODO Consider to have distinct backend set in LowerInfoMap + ir::BackendSet backends; + for (auto &itr : _graph.getLowerInfo()->operation) + { + backends.add(itr.second->backend()); + } + _scheduler = nnfw::cpp14::make_unique<ParallelScheduler>(backends); + + assert(noWaitingJobs()); + + // Execution setup + _waiting_jobs.swap(_finished_jobs); // Move finished jobs to waiting jobs + + for (uint32_t i = 0; i < _waiting_jobs.size(); ++i) + { + VERBOSE(ParallelExecutor) << i << ": " << _input_info[i] << std::endl; + if (_input_info[i] == 0) + { + emplaceToReadyJobs(i); + } + } + assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs + + VERBOSE(ParallelExecutor) << "INITIAL JOBS : " << _ready_jobs.size() << std::endl; + + _subject.notifyModelBegin(this); + while (true) + { + std::unique_lock<std::mutex> lock{_mu_jobs}; + + if (_ready_jobs.empty()) + { + _cv_jobs.wait(lock, [this] { return !_ready_jobs.empty() || noWaitingJobs(); }); + // Check finish condition + if (_ready_jobs.empty() && noWaitingJobs()) + { + break; + } + } + + auto job = std::move(_ready_jobs.begin()->second); + _ready_jobs.erase(_ready_jobs.begin()); + + lock.unlock(); + + VERBOSE(ParallelExecutor) << "Assigning fn #" << job->index() << std::endl; + + auto job_index = job->index(); + auto subgraph_index = _job_to_op_seq[job_index]; + auto op_seq = &_graph.subgraphs()->at(subgraph_index); + auto backend = _graph.getLowerInfo()->operation.at(subgraph_index)->backend(); + auto setup = [&, op_seq, backend]() { _subject.notifyJobBegin(this, op_seq, backend); }; + auto teardown = [&, job_index, op_seq, backend]() { + _subject.notifyJobEnd(this, op_seq, backend); + notify(job_index); + }; + + _scheduler->assign(nnfw::cpp14::make_unique<HookFunction>(job->fn(), setup, teardown), backend); + _finished_jobs[job_index] = std::move(job); + } + + assert(noWaitingJobs()); + + // Wait for all the jobs done + _scheduler->finish(); + _subject.notifyModelEnd(this); + + // Reset input info for the next execution + _input_info = _initial_input_info; +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/ParallelExecutor.h b/runtime/neurun/core/src/exec/ParallelExecutor.h new file mode 100644 index 000000000..54377fd9e --- /dev/null +++ b/runtime/neurun/core/src/exec/ParallelExecutor.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_PARALLEL_EXECUTOR_H__ +#define __NEURUN_EXEC_PARALLEL_EXECUTOR_H__ + +#include <list> +#include <queue> +#include <unordered_map> + +#include "FunctionSequence.h" +#include "Job.h" +#include "ir/OperandIndexSequence.h" +#include "ir/Index.h" +#include "cpp14/memory.h" +#include "exec/DataflowExecutor.h" +#include "ParallelScheduler.h" + +namespace neurun +{ +namespace exec +{ + +/** + * @brief Class to execute Graph in parallel + */ +class ParallelExecutor : public DataflowExecutor +{ +protected: + void notify(uint32_t finished_job_id) override; + +public: + /** + * @brief Constructs a ParallelExecutor object + * + * @param graph Graph object + * @param operand_context (Only for input/output operand data access) + * @param code_map Compiled code map + * @param ranks Operation ranks for ordering execution + */ + ParallelExecutor(const ir::Graph &graph, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, CodeMap &&code_map); + + void executeImpl() override; + +private: + std::condition_variable _cv_jobs; + std::mutex _mu_jobs; + std::unique_ptr<ParallelScheduler> _scheduler; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_PARALLEL_EXECUTOR_H__ diff --git a/runtime/neurun/core/src/exec/ParallelScheduler.cc b/runtime/neurun/core/src/exec/ParallelScheduler.cc new file mode 100644 index 000000000..5f9e9e013 --- /dev/null +++ b/runtime/neurun/core/src/exec/ParallelScheduler.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ParallelScheduler.h" + +#include <cassert> + +#include "cpp14/memory.h" +#include "util/logging.h" + +namespace neurun +{ +namespace exec +{ + +ParallelScheduler::ParallelScheduler(const ir::BackendSet &backends) +{ + assert(!backends.empty()); + + for (auto backend : backends) + { + _thread_pools[backend] = nnfw::cpp14::make_unique<ThreadPool>(); + } +} + +void ParallelScheduler::assign(std::unique_ptr<IFunction> &&fn, const backend::Backend *backend) +{ + assert(!_thread_pools.empty()); + + _thread_pools.at(backend)->enqueue(std::move(fn)); +} + +void ParallelScheduler::finish() +{ + for (auto &itr : _thread_pools) + { + itr.second->finish(); + } +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/ParallelScheduler.h b/runtime/neurun/core/src/exec/ParallelScheduler.h new file mode 100644 index 000000000..af1103750 --- /dev/null +++ b/runtime/neurun/core/src/exec/ParallelScheduler.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_PARALLEL_SCHEDULER_H__ +#define __NEURUN_EXEC_PARALLEL_SCHEDULER_H__ + +#include <unordered_map> +#include <memory> + +#include "exec/IFunction.h" +#include "ir/BackendSet.h" +#include "ThreadPool.h" + +namespace neurun +{ +namespace exec +{ + +class ParallelScheduler +{ +public: + /** + * @brief Constructs ParallelScheduler object + * + * @param backends Backend set + */ + ParallelScheduler(const ir::BackendSet &backends); + /** + * @brief Assign a task to the given backend + * + * @param[in] fn Function to be assigned + * @param[in] fn Target backend + */ + void assign(std::unique_ptr<IFunction> &&fn, const backend::Backend *backend); + /** + * @brief Block until all jobs are finished + */ + void finish(); + +private: + std::unordered_map<const backend::Backend *, std::unique_ptr<ThreadPool>> _thread_pools; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_PARALLEL_SCHEDULER_H__ diff --git a/runtime/neurun/core/src/exec/Sink.h b/runtime/neurun/core/src/exec/Sink.h new file mode 100644 index 000000000..bb2a6c58a --- /dev/null +++ b/runtime/neurun/core/src/exec/Sink.h @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_SINK_H__ +#define __NEURUN_EXEC_SINK_H__ + +#include <cassert> + +#include "cpp14/memory.h" +#include "util/feature/nchw/Reader.h" +#include "util/feature/nchw/View.h" +#include "util/feature/nhwc/Reader.h" +#include "util/feature/nhwc/View.h" +#include "util/Utils.h" +#include <misc/feature/IndexIterator.h> + +namespace neurun +{ +namespace exec +{ +struct ISink +{ + virtual ~ISink() = default; + + virtual void pull(::neurun::backend::operand::ITensor &tensor) const = 0; +}; + +// Create second lever inheritance: the first lever is used as a reference type in use-case places +template <typename T> class ITemplSink : public ISink +{ +public: + ITemplSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape, + const bool copy, ir::Layout io_layout) + : _output_buffer{reinterpret_cast<T *>(output_buffer)}, _output_size{output_size}, + _shape{shape}, _copy{copy}, _io_layout{io_layout} + { + } + +protected: + void pullUnif(neurun::backend::operand::ITensor &tensor) const + { + assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) || + (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) || + _copy); + auto input_buffer = tensor.buffer(); + auto rank = _shape.rank(); + + if (!tensor.has_padding() && rank < 4 + _copy) + { + memcpy(_output_buffer, input_buffer, _output_size); + return; + } + + switch (rank) + { + case 0: + case 1: + { + memcpy(_output_buffer, input_buffer, _output_size); + break; + } + case 2: + { + const int32_t copy_len = _shape.dim(1); + + for (auto i = 0; i < _shape.dim(0); ++i) + { + neurun::util::Coordinates coords{i, 0}; + memcpy(_output_buffer + i * copy_len, input_buffer + tensor.calcOffset(coords), + copy_len * sizeof(T)); + } + break; + } + case 3: + { + const int32_t dim1 = _shape.dim(1); + const int32_t dim2 = _shape.dim(2); + + for (auto i = 0; i < _shape.dim(0); ++i) + { + for (auto j = 0; j < _shape.dim(1); ++j) + { + neurun::util::Coordinates coords{i, j, 0}; + memcpy(_output_buffer + i * dim1 * dim2 + j * dim2, + input_buffer + tensor.calcOffset(coords), dim2 * sizeof(T)); + } + } + break; + } + case 4: + { + if (_copy) + { + const int32_t dim1 = _shape.dim(1); + const int32_t dim2 = _shape.dim(2); + const int32_t dim3 = _shape.dim(3); + + for (auto i = 0; i < _shape.dim(0); ++i) + { + for (auto j = 0; j < _shape.dim(1); ++j) + { + for (auto k = 0; k < _shape.dim(2); ++k) + { + neurun::util::Coordinates coords{i, j, k, 0}; + memcpy(_output_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3, + input_buffer + tensor.calcOffset(coords), dim3 * sizeof(T)); + } + } + } + } + else + { + const auto shape = _shape.asFeature(_io_layout); + + if (_io_layout == ir::Layout::NHWC) + { + const util::feature::nchw::Reader<T> from(&tensor); + util::feature::nhwc::View<T> into(shape, _output_buffer, _output_size); + ::nnfw::misc::feature::iterate(shape) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, ch, row, col); + into.at(batch, row, col, ch) = value; + }; + } + else if (_io_layout == ir::Layout::NCHW) + { + const util::feature::nhwc::Reader<T> from(&tensor); + util::feature::nchw::View<T> into(shape, _output_buffer, _output_size); + ::nnfw::misc::feature::iterate(shape) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, row, col, ch); + into.at(batch, ch, row, col) = value; + }; + } + else + { + throw std::runtime_error("Wrong Layout"); + } + } + break; + } + default: + throw std::runtime_error("NYI"); + break; + } + } + +private: + T *_output_buffer; + const size_t _output_size; + const ir::Shape _shape; + const bool _copy; + const ir::Layout _io_layout; +}; + +template <typename T> class PermutateSink final : public ITemplSink<T> +{ +public: + PermutateSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape, + ir::Layout io_layout) + : ITemplSink<T>(output_buffer, output_size, shape, false, io_layout) + { + } + +public: + void pull(neurun::backend::operand::ITensor &tensor) const override + { + ITemplSink<T>::pullUnif(tensor); + } +}; + +// Only supports NHWC format front-end(NNAPI) now +template <typename T> class CopySink final : public ITemplSink<T> +{ +public: + CopySink(void *output_buffer, const size_t &output_size, const ir::Shape &shape, + ir::Layout io_layout = ir::Layout::UNKNOWN) + : ITemplSink<T>(output_buffer, output_size, shape, true, io_layout) + { + } + +public: + void pull(neurun::backend::operand::ITensor &tensor) const override + { + ITemplSink<T>::pullUnif(tensor); + } +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_SINK_H__ diff --git a/runtime/neurun/core/src/exec/Source.h b/runtime/neurun/core/src/exec/Source.h new file mode 100644 index 000000000..fd52dd546 --- /dev/null +++ b/runtime/neurun/core/src/exec/Source.h @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_SOURCE_H__ +#define __NEURUN_EXEC_SOURCE_H__ + +#include <cassert> + +#include "cpp14/memory.h" +#include "util/feature/nchw/Reader.h" +#include "util/feature/nchw/View.h" +#include "util/feature/nhwc/Reader.h" +#include "util/feature/nhwc/View.h" +#include "util/Utils.h" +#include <misc/feature/IndexIterator.h> +#include <ir/Layout.h> +#include "ir/Shape.h" + +namespace neurun +{ +namespace exec +{ + +struct ISource +{ + virtual ~ISource() = default; + + virtual void push(::neurun::backend::operand::ITensor &tensor) const = 0; +}; + +// Create second lever inheritance: the first lever is used as a reference type in use-case places +template <typename T> class ITemplSource : public ISource +{ +public: + ITemplSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape, + const bool copy, ir::Layout io_layout) + : _input_buffer{reinterpret_cast<const T *>(input_buffer)}, _input_size{input_size}, + _shape{shape}, _copy(copy), _io_layout{io_layout} + { + } + + virtual void push(::neurun::backend::operand::ITensor &tensor) const = 0; + +protected: + void pushUnif(neurun::backend::operand::ITensor &tensor) const + { + assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) || + (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) || + _copy); + auto output_buffer = tensor.buffer(); + auto rank = _shape.rank(); + + if (!tensor.has_padding() && rank < 4 + _copy) + { + memcpy(output_buffer, _input_buffer, _input_size); + return; + } + + switch (rank) + { + case 0: + case 1: + { + memcpy(output_buffer, _input_buffer, _input_size); + break; + } + case 2: + { + const int32_t copy_len = _shape.dim(1); + + for (auto i = 0; i < _shape.dim(0); ++i) + { + neurun::util::Coordinates coords{i, 0}; + memcpy(output_buffer + tensor.calcOffset(coords), _input_buffer + i * copy_len, + copy_len * sizeof(T)); + } + break; + } + case 3: + { + const int32_t dim1 = _shape.dim(1); + const int32_t dim2 = _shape.dim(2); + + for (auto i = 0; i < _shape.dim(0); ++i) + { + for (auto j = 0; j < _shape.dim(1); ++j) + { + neurun::util::Coordinates coords{i, j, 0}; + memcpy(output_buffer + tensor.calcOffset(coords), + _input_buffer + i * dim1 * dim2 + j * dim2, dim2 * sizeof(T)); + } + } + break; + } + case 4: + { + if (_copy) + { + const int32_t dim1 = _shape.dim(1); + const int32_t dim2 = _shape.dim(2); + const int32_t dim3 = _shape.dim(3); + for (auto i = 0; i < _shape.dim(0); ++i) + { + for (auto j = 0; j < _shape.dim(1); ++j) + { + for (auto k = 0; k < _shape.dim(2); ++k) + { + neurun::util::Coordinates coords{i, j, k, 0}; + memcpy(output_buffer + tensor.calcOffset(coords), + _input_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3, + dim3 * sizeof(T)); + } + } + } + } + else + { + const auto shape = _shape.asFeature(_io_layout); + + if (_io_layout == ir::Layout::NCHW) + { + const util::feature::nchw::Reader<T> from(shape, _input_buffer, _input_size); + util::feature::nhwc::View<T> into(&tensor); + ::nnfw::misc::feature::iterate(shape) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, ch, row, col); + into.at(batch, row, col, ch) = value; + }; + } + else if (_io_layout == ir::Layout::NHWC) + { + const util::feature::nhwc::Reader<T> from(shape, _input_buffer, _input_size); + util::feature::nchw::View<T> into(&tensor); + ::nnfw::misc::feature::iterate(shape) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, row, col, ch); + into.at(batch, ch, row, col) = value; + }; + } + else + { + throw std::runtime_error("Wrong Layout"); + } + } + + break; + } + default: + throw std::runtime_error("NYI"); + break; + } + } + +private: + const T *_input_buffer; + const size_t _input_size; + const ir::Shape _shape; + const bool _copy; + const ir::Layout _io_layout; +}; + +template <typename T> class PermutateSource final : public ITemplSource<T> +{ +public: + PermutateSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape, + ir::Layout io_layout) + : ITemplSource<T>(input_buffer, input_size, shape, false, io_layout) + { + } + +public: + void push(neurun::backend::operand::ITensor &tensor) const override + { + // do NHWC_TO_NCHW or NCHW_TO_NHWC permutation + ITemplSource<T>::pushUnif(tensor); + } +}; + +template <typename T> class CopySource final : public ITemplSource<T> +{ +public: + CopySource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape, + ir::Layout io_layout = ir::Layout::UNKNOWN) + : ITemplSource<T>(input_buffer, input_size, shape, true, io_layout) + { + } + +public: + void push(neurun::backend::operand::ITensor &tensor) const override + { + ITemplSource<T>::pushUnif(tensor); + } +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_SOURCE_H__ diff --git a/runtime/neurun/core/src/exec/ThreadPool.cc b/runtime/neurun/core/src/exec/ThreadPool.cc new file mode 100644 index 000000000..d8c706e30 --- /dev/null +++ b/runtime/neurun/core/src/exec/ThreadPool.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ThreadPool.h" + +#include <cassert> + +namespace neurun +{ +namespace exec +{ + +ThreadPool::ThreadPool(uint32_t num_threads) +{ + assert(num_threads >= 1); + + for (uint32_t i = 0; i < num_threads; i++) + { + _threads.emplace_back(std::ref(_worker)); + } +} + +ThreadPool::~ThreadPool() +{ + if (!_threads.empty()) + { + _worker.terminate(); + join(); + } +} + +void ThreadPool::enqueue(std::unique_ptr<IFunction> &&fn) { _worker.enqueue(std::move(fn)); } + +uint32_t ThreadPool::numJobsInQueue() { return _worker.numJobsInQueue(); } + +void ThreadPool::join() +{ + for (auto &thread : _threads) + { + thread.join(); + } + _threads.clear(); +} + +void ThreadPool::finish() +{ + _worker.finish(); + join(); +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/ThreadPool.h b/runtime/neurun/core/src/exec/ThreadPool.h new file mode 100644 index 000000000..a1a027617 --- /dev/null +++ b/runtime/neurun/core/src/exec/ThreadPool.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_THREAD_POOL_H__ +#define __NEURUN_EXEC_THREAD_POOL_H__ + +#include <thread> +#include <memory> +#include <vector> + +#include "WorkQueue.h" + +namespace neurun +{ +namespace exec +{ + +class ThreadPool +{ +public: + /** + * @brief Coustruct ThreadPool object + * + * @param num_threads Number of threads + */ + ThreadPool(uint32_t num_threads = 1); + /** + * @brief Destroy ThreadPool object + */ + ~ThreadPool(); + /** + * @brief Enqueue a function + * + * @param fn A function to be queued + */ + void enqueue(std::unique_ptr<IFunction> &&fn); + /** + * @brief Get number of jobs in worker's queue + * + * @return Number of jobs + */ + uint32_t numJobsInQueue(); + + /** + * @brief Block until all jobs are finished + */ + void finish(); + +private: + void join(); + +private: + WorkQueue _worker; + std::vector<std::thread> _threads; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_THREAD_POOL_H__ diff --git a/runtime/neurun/core/src/exec/WorkQueue.cc b/runtime/neurun/core/src/exec/WorkQueue.cc new file mode 100644 index 000000000..6712554ac --- /dev/null +++ b/runtime/neurun/core/src/exec/WorkQueue.cc @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "WorkQueue.h" + +#include <cassert> + +namespace neurun +{ +namespace exec +{ + +WorkQueue::~WorkQueue() +{ + { + std::unique_lock<std::mutex> lock(_mu); + _state = State::FORCE_FINISHING; + } + _cv.notify_all(); +} + +void WorkQueue::operator()() +{ + while (true) + { + std::unique_ptr<IFunction> fn = nullptr; + + { + std::unique_lock<std::mutex> lock{_mu}; + _cv.wait(lock, [this] { + return (_state == State::FORCE_FINISHING) || (_state == State::FINISHING) || + (_state == State::ONLINE && !_functions.empty()); + }); + + if (_state == State::FORCE_FINISHING) + { + assert(_functions.empty() && "Terminating with unfinished jobs"); + return; + } + else if (_state == State::FINISHING && _functions.empty()) + { + return; + } + else + { + assert(((_state == State::FINISHING) || (_state == State::ONLINE)) && !_functions.empty()); + fn = std::move(_functions.front()); + _functions.pop(); + } + } + + assert(fn); + fn->run(); + } +} + +void WorkQueue::enqueue(std::unique_ptr<IFunction> &&fn) +{ + { + std::unique_lock<std::mutex> lock{_mu}; + _functions.emplace(std::move(fn)); + } + _cv.notify_one(); +} + +void WorkQueue::terminate() +{ + { + std::unique_lock<std::mutex> lock{_mu}; + _state = State::FORCE_FINISHING; + } + _cv.notify_all(); +} + +void WorkQueue::finish() +{ + { + std::unique_lock<std::mutex> lock{_mu}; + _state = State::FINISHING; + } + _cv.notify_all(); +} + +uint32_t WorkQueue::numJobsInQueue() +{ + std::unique_lock<std::mutex> lock{_mu}; + return _functions.size(); +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/WorkQueue.h b/runtime/neurun/core/src/exec/WorkQueue.h new file mode 100644 index 000000000..cdbadfb8f --- /dev/null +++ b/runtime/neurun/core/src/exec/WorkQueue.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_WORK_QUEUE_H__ +#define __NEURUN_EXEC_WORK_QUEUE_H__ + +#include <condition_variable> +#include <memory> +#include <mutex> +#include <queue> + +#include "exec/IFunction.h" + +namespace neurun +{ +namespace exec +{ + +class WorkQueue +{ +public: + enum class State + { + ONLINE, + FINISHING, + FORCE_FINISHING + }; + +public: + /** + * @brief Create WorkQueue object + */ + WorkQueue() = default; + /** + * @brief Destroy WorkQueue object + */ + ~WorkQueue(); + /** + * @brief Thread entry function + */ + void operator()(); + /** + * @brief Push the given Task to the job queue + * + * @param fn Function to be executed(a job) + */ + void enqueue(std::unique_ptr<IFunction> &&fn); + /** + * @brief Flag as terminating so all the worker threads can terminate + */ + void terminate(); + /** + * @brief Flag as terminating so all the worker threads can terminate + */ + void finish(); + /** + * @brief Check if it has pending jobs. Even if this returns fals, WorkQueue threads may be still + * running + * + * @return true if the job queue not empty otherwise false + */ + uint32_t numJobsInQueue(); + +private: + State _state{State::ONLINE}; + std::queue<std::unique_ptr<IFunction>> _functions; + std::mutex _mu; + std::condition_variable _cv; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_WORK_QUEUE_H__ diff --git a/runtime/neurun/core/src/exec/interp/Buffer.h b/runtime/neurun/core/src/exec/interp/Buffer.h new file mode 100644 index 000000000..d60b59a2f --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Buffer.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Buffer.h + * @brief This file contains Buffer interface and InternalBuffer, ExternalBuffer class + */ +#ifndef __NEURUN_EXEC_INTERP_BUFFER_H__ +#define __NEURUN_EXEC_INTERP_BUFFER_H__ + +#include <cpp14/memory.h> + +#include "ir/Data.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Interface for writable data area + */ +class Buffer : public ir::Data +{ +public: + /** + * @brief Return writable pointer for data area + * @return Writable pointer + */ + virtual uint8_t *baseWritable(void) const = 0; +}; + +/** + * @brief Class for internally allocated data area + */ +class InternalBuffer final : public Buffer +{ +public: + InternalBuffer(size_t size) : _base{nnfw::cpp14::make_unique<uint8_t[]>(size)}, _size{size} + { + // DO NOTHING + } + +public: + size_t size(void) const override { return _size; } + const uint8_t *base(void) const override { return _base.get(); } + uint8_t *baseWritable(void) const override { return _base.get(); } + +private: + std::unique_ptr<uint8_t[]> _base; + size_t _size; +}; + +/** + * @brief Class for data area from outside + */ +class ExternalBuffer final : public Buffer +{ +public: + ExternalBuffer(uint8_t *base, size_t size) : _base{base}, _size{size} + { + // DO NOTHING + } + +public: + size_t size(void) const override { return _size; } + const uint8_t *base(void) const override { return _base; } + uint8_t *baseWritable(void) const override { return _base; } + +private: + uint8_t *_base; + size_t _size; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_BUFFER_H__ diff --git a/runtime/neurun/core/src/exec/interp/ExecEnv.h b/runtime/neurun/core/src/exec/interp/ExecEnv.h new file mode 100644 index 000000000..0f7d45e2a --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/ExecEnv.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file ExecEnv.h + * @brief This file contains ExecEnv to access interpreter tensor and execution status + */ +#ifndef __NEURUN_EXEC_INTERP_EXEC_ENV_H_ +#define __NEURUN_EXEC_INTERP_EXEC_ENV_H_ + +#include <unordered_set> + +#include "ir/Graph.h" +#include "Tensor.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Class to gather interpreter execution environment + * Each interpreter instance own execution environment + */ +class ExecEnv +{ +public: + /** + * @brief Construct a new Exec Env object (deleted) + */ + ExecEnv(void) = delete; + /** + * @brief Construct a new ExecEnv object + * @param[in] graph Graph to execute by interpreter + */ + explicit ExecEnv(const ir::Graph &graph) : _graph(graph) + { + // DO NOTHING + } + +public: + /** + * @brief Return graph to execute + * @return Graph + */ + const ir::Graph &graph(void) const { return _graph; } + /** + * @brief Assign tensor to environment which have allocated or assigned buffer + * @param[in] index Tensor index + * @param[in] tensor Tensor + */ + void assignTensor(const ir::OperandIndex index, std::shared_ptr<ITensor> tensor) + { + assert(tensor->bufferRO() != nullptr); + _tensors.emplace(index, tensor); + } + + /** + * @brief Return tensor pointer in environment + * @param[in] index Tensor index + * @return Tensor pointer + */ + const ITensor *tensorAt(const ir::OperandIndex index) const { return _tensors.at(index).get(); } + + /** + * @brief Check environment contains tensor + * @param[in] index Tensor index + * @return @c true if environment contain tensor, otherwise @c false + */ + bool contains(const ir::OperandIndex index) const + { + return (_tensors.find(index) != _tensors.end()); + } + + /** + * @brief Allocate tensor using operand info + * @param[in] index Tensor index + * @param[in] info Operand info + * @note If already allocated, just return + * @TODO More smart allocation policy + */ + void allocateIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info) + { + // already allocated, or constant + if (contains(index)) + { + return; + } + + auto tensor = std::make_shared<Tensor>(info); + tensor->setBuffer(std::make_shared<InternalBuffer>(tensor->total_size())); + assignTensor(index, tensor); + _buffers.insert(index); + } + + /** + * @brief Allocate read-only tensor and share data with other tensor + * @param[in] index Tensor index + * @param[in] info Operand info + * @param[in] index_to_share Tensor index that have data to share + */ + void allocateAndShareIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info, + const ir::OperandIndex index_to_share) + { + if (!contains(index_to_share)) + { + throw std::runtime_error{"Cannot find tensor to share data"}; + } + + // already allocated + if (contains(index)) + { + return; + } + else + { + auto tensor = std::make_shared<ROTensor>(info); + tensor->setData(tensorAt(index_to_share)->shareData()); + assignTensor(index, tensor); + _buffers.insert(index); + } + } + + /** + * @brief Free buffer if allocated by allocateIfNeed + * @param[in] index Tensor index + * @note If allocated by outside, just return + */ + void freeIfAllocated(const ir::OperandIndex index) + { + if (_buffers.find(index) != _buffers.end()) + { + _tensors.at(index)->releaseData(); + } + } + +private: + const ir::Graph &_graph; + // Tensor map to use in interpreter + // It should map tensors that have allocated or assigned buffer pointer + std::unordered_map<ir::OperandIndex, std::shared_ptr<ITensor>> _tensors; + // Tensors allocated by allocateIfNeed (buffer) + std::unordered_set<ir::OperandIndex> _buffers; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_EXEC_ENV_H_ diff --git a/runtime/neurun/core/src/exec/interp/ExecManager.cc b/runtime/neurun/core/src/exec/interp/ExecManager.cc new file mode 100644 index 000000000..92f182c06 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/ExecManager.cc @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ExecManager.h" +#include "ExecEnv.h" +#include "Interpreter.h" + +#include "util/logging.h" + +#include <cpp14/memory.h> + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +void ExecManager::execute(const IODescription &desc) +{ + /************************************************************************ + * Prepare execution model (submodel) + It may execute divided model + but now consider model inference is done at interpreter + ***********************************************************************/ + ir::OperandIndexMap<std::shared_ptr<ITensor>> tensor_map; + + for (uint32_t n = 0; n < _graph.getInputs().size(); n++) + { + ir::IOIndex index{n}; + const auto input_index = _graph.getInputs().at(index); + const auto &input = *desc.inputs.at(n); + + auto input_tensor = std::make_shared<ROTensor>(input.info); + input_tensor->setData(std::make_shared<const ir::ExternalData>( + reinterpret_cast<const uint8_t *>(input.buffer), input.size)); + tensor_map[input_index] = input_tensor; + } + + for (uint32_t n = 0; n < _graph.getOutputs().size(); n++) + { + ir::IOIndex index{n}; + const auto output_index = _graph.getOutputs().at(index); + const auto &output = *desc.outputs.at(n); + + auto output_tensor = std::make_shared<Tensor>(output.info); + output_tensor->setBuffer( + std::make_shared<ExternalBuffer>(reinterpret_cast<uint8_t *>(output.buffer), output.size)); + tensor_map[output_index] = output_tensor; + } + + /************************************************************************ + * Prepare execution environment + Execution environment will be assigned to invoked interpreter instance + ***********************************************************************/ + + std::unique_ptr<ExecEnv> interp_env = nnfw::cpp14::make_unique<ExecEnv>(_graph); + + // Assign input tensor into interpreter execution environment + for (auto index : _graph.getInputs()) + { + if (tensor_map.find(index) != tensor_map.end()) + { + VERBOSE(INTERPRETER) << "Assign input tensor. operand index:" << index.value() << std::endl; + interp_env->assignTensor(index, tensor_map.at(index)); + } + } + + // Assign output tensor into interpreter execution environment + for (auto index : _graph.getOutputs()) + { + if (tensor_map.find(index) != tensor_map.end()) + { + VERBOSE(INTERPRETER) << "Assign output tensor. operand index: " << index.value() << std::endl; + interp_env->assignTensor(index, tensor_map.at(index)); + } + } + + // Allocate constant tensor + _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { + if (obj.isConstant()) + { + VERBOSE(INTERPRETER) << "Allocate and assign constant tensor. operand index:" << ind.value() + << std::endl; + + auto const_tensor = std::make_shared<ROTensor>(obj.info()); + // Assume that interpreter's tensor layout is same with model (NHWC) + const_tensor->setData( + std::make_shared<ir::ExternalData>(obj.data().base(), obj.info().total_size())); + interp_env->assignTensor(ind, const_tensor); + } + }); + + /***************************************************************************** + * Invoke interpreter + ****************************************************************************/ + + Interpreter interp(std::move(interp_env)); + interp.run(); + + /***************************************************************************** + * Invoked interpreter run is finished + ****************************************************************************/ + + // If interpreter execute submodel + // 1. Get tensor output of submodel into tensor_map to save result + // 2. Generate new ExecEnv for next interpretation +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/ExecManager.h b/runtime/neurun/core/src/exec/interp/ExecManager.h new file mode 100644 index 000000000..f952abf02 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/ExecManager.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file ExecManager.h + * @brief This file contains ExecManager class\n + * to manage interpreter execution and environment + */ +#ifndef __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_ +#define __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_ + +#include "ir/Graph.h" +#include "exec/IExecutor.h" +#include "Tensor.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Class to execute model using interpreter + */ +class ExecManager final : public IExecutor +{ +public: + explicit ExecManager(const ir::Graph &graph) : _graph(graph) + { + // DO NOTHING + } + +public: + /** + * @brief Return graph object + * @return Graph object + */ + const ir::Graph &graph() final { return _graph; } + void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) override{ + // Not implemented + }; + /** + * @brief Start execution + * @note It should be called after setting input and output buffer + */ + void execute(const IODescription &desc) final; + +private: + const ir::Graph &_graph; + ir::OperandIndexMap<std::shared_ptr<ITensor>> _tensor_map; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_ diff --git a/runtime/neurun/core/src/exec/interp/Interpreter.cc b/runtime/neurun/core/src/exec/interp/Interpreter.cc new file mode 100644 index 000000000..8373419f6 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Interpreter.cc @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Interpreter.h" + +#include <stack> +#include <unordered_set> + +#include "Registration.h" + +#include "ir/OperandIndexMap.h" +#include "util/logging.h" +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +// TODO more structured execution kernel implementation +// TODO use cker for execution +// TODO divide tensor prepare and execution +// TODO introduce memory manager (buffer allocate and free) +class OperationExecutor : ir::OperationVisitor +{ +public: + OperationExecutor(ExecEnv *env) : _env{env} + { + _kernels[ir::OpCode::Add] = getAdd(); + _kernels[ir::OpCode::Sub] = getSub(); + _kernels[ir::OpCode::Mul] = getMul(); + _kernels[ir::OpCode::Conv2D] = getConv2D(); + _kernels[ir::OpCode::MaxPool2D] = getMaxPool2D(); + _kernels[ir::OpCode::Concat] = getConcat(); + _kernels[ir::OpCode::Gather] = getGather(); + _kernels[ir::OpCode::AvgPool2D] = getAvgPool2D(); + _kernels[ir::OpCode::FullyConnected] = getFullyConnected(); + _kernels[ir::OpCode::InstanceNorm] = getInstanceNorm(); + _kernels[ir::OpCode::Softmax] = getSoftMax(); + _kernels[ir::OpCode::Reshape] = getReshape(); + _kernels[ir::OpCode::DepthwiseConv2D] = getDepthwiseConv(); + _kernels[ir::OpCode::TransposeConv] = getTransposeConv(); + _kernels[ir::OpCode::Logistic] = getLogistic(); + _kernels[ir::OpCode::Pad] = getPad(); + _kernels[ir::OpCode::ReLU] = getReLU(); + _kernels[ir::OpCode::ReLU1] = getReLU1(); + _kernels[ir::OpCode::ReLU6] = getReLU6(); + _kernels[ir::OpCode::Tanh] = getTanh(); + } + + void execute(const ir::OperationIndex &idx) + { + const auto nodeName = _env->graph().operations().at(idx).name(); + VERBOSE(INTERPRETER) << "Prepare output operands and execute " << nodeName + << " operation (id: " << idx.value() << ")" << std::endl; + _env->graph().operations().at(idx).accept(*this); + } + +private: +#define OP(InternalName) \ + void visit(const ir::operation::InternalName &node) override \ + { \ + if (_kernels[ir::OpCode::InternalName]->prepare != nullptr) \ + { \ + _kernels[ir::OpCode::InternalName]->prepare(_env, node); \ + } \ + _kernels[ir::OpCode::InternalName]->invoke(_env, node); \ + } +#include "ir/Operations.lst" +#undef OP + +private: + ExecEnv *_env; + std::unordered_map<ir::OpCode, OpKernel *> _kernels; +}; + +void Interpreter::run() +{ + VERBOSE(INTERPRETER) << "Interpreter is invoked " << std::endl; + + // operand_stack: save operands prepared to use + std::stack<ir::OperandIndex> operand_stack; + + // Note: We should push input first, then constant. + // We use use-def for find operators ready to execution, + // but Use-Def cannot handle parameters (maybe constant, but not always) + // Note: If all model inputs are constant, it may not work (depend on tensors' order). + // But that scenario may not exist + for (auto ind : _env->graph().getInputs()) + { + VERBOSE(INTERPRETER) << "Input: Push to operand stack " << ind.value() << std::endl; + + operand_stack.push(ind); + } + + _env->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { + if (obj.isConstant()) + { + VERBOSE(INTERPRETER) << "Constant: Push to operand stack " << ind.value() << std::endl; + + operand_stack.push(ind); + } + }); + + // Execution + std::unordered_set<ir::OperandIndex> ready_check; + std::unordered_set<ir::OperationIndex> executed; + OperationExecutor executor{_env.get()}; + while (!operand_stack.empty()) + { + const auto current_operand_index = operand_stack.top(); + operand_stack.pop(); + VERBOSE(INTERPRETER) << "Poped operand " << current_operand_index.value() + << " is checked ready to use" << std::endl; + + assert(ready_check.find(current_operand_index) == ready_check.end()); + ready_check.insert(current_operand_index); + + // Find prepared operations by scan use of current operand + std::stack<ir::OperationIndex> operation_stack; + auto use_operators = std::list<ir::OperationIndex>( + _env->graph().operands().at(current_operand_index).getUses().list()); + // Remove operation index duplication + // If one operation uses same operand tensor for multiple input, + // use-list have duplicated operation index + use_operators.unique(); + for (auto use_operator : use_operators) + { + // Assumption: all parameters are ready to use + bool operator_ready = true; + for (auto input_index : _env->graph().operations().at(use_operator).getInputs()) + { + if (ready_check.find(input_index) == ready_check.end()) + { + operator_ready = false; + break; + } + } + + if (operator_ready) + { + VERBOSE(INTERPRETER) << "Ready to execute operation " << use_operator.value() << std::endl; + operation_stack.push(use_operator); + } + } + + while (!operation_stack.empty()) + { + const auto current_operation_index = operation_stack.top(); + operation_stack.pop(); + VERBOSE(INTERPRETER) << "Poped operation: " << current_operation_index.value() << "(" + << _env->graph().operations().at(current_operation_index).name() << ")" + << std::endl; + + // execution + // 1. Prepare output tensor + // 2. Call operation kernel + executor.execute(current_operation_index); + executed.insert(current_operation_index); + + // 3. Push each output into operand stack + const auto def_operands = _env->graph().operations().at(current_operation_index).getOutputs(); + for (auto def_operand : def_operands) + { + VERBOSE(INTERPRETER) << "Buffer: Push to operand stack " << def_operand.value() + << std::endl; + operand_stack.push(def_operand); + } + + // 4. Free if lifetime of buffer operands used by input is finished + for (auto input_index : _env->graph().operations().at(current_operation_index).getInputs()) + { + const auto use_operators = _env->graph().operands().at(input_index).getUses(); + bool dead_buffer = true; + for (auto use_operator : use_operators.list()) + { + if (executed.find(use_operator) == executed.end()) + { + dead_buffer = false; + break; + } + } + + if (dead_buffer) + { + _env->freeIfAllocated(input_index); + } + } + } + } +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/Interpreter.h b/runtime/neurun/core/src/exec/interp/Interpreter.h new file mode 100644 index 000000000..1b73592b3 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Interpreter.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Interpreter.h + * @brief This file contains Interpreter class for interpretation + */ +#ifndef __NEURUN_EXEC_INTERP_INTERPRETER_H__ +#define __NEURUN_EXEC_INTERP_INTERPRETER_H__ + +#include "ExecEnv.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Class for interpretation + */ +class Interpreter +{ + +public: + /** + * @brief Construct a new Interpreter object (deleted) + */ + Interpreter() = delete; + /** + * @brief Construct a new Interpreter object + * @param[in] env Execution environment variable for interpreter object + */ + Interpreter(std::unique_ptr<ExecEnv> env) : _env{std::move(env)} + { + // DO NOTHING + } + +public: + /** + * @brief Run interpreter until there is no operation to execute + */ + void run(); + +private: + std::unique_ptr<ExecEnv> _env; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_INTERPRETER_H__ diff --git a/runtime/neurun/core/src/exec/interp/Registration.h b/runtime/neurun/core/src/exec/interp/Registration.h new file mode 100644 index 000000000..3ebe3bc9f --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Registration.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_INTERP_REGISTRATION_H__ +#define __NEURUN_EXEC_INTERP_REGISTRATION_H__ + +#include "ExecEnv.h" + +#include "ir/Operation.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +struct OpKernel +{ + std::function<void(ExecEnv *, const ir::Operation &)> prepare; + std::function<void(const ExecEnv *, const ir::Operation &)> invoke; +}; + +// Defined in operations/ directory +OpKernel *getAdd(); +OpKernel *getSub(); +OpKernel *getMul(); +OpKernel *getConv2D(); +OpKernel *getMaxPool2D(); +OpKernel *getConcat(); +OpKernel *getGather(); +OpKernel *getAvgPool2D(); +OpKernel *getFullyConnected(); +OpKernel *getInstanceNorm(); +OpKernel *getSoftMax(); +OpKernel *getDepthwiseConv(); +OpKernel *getReshape(); +OpKernel *getTransposeConv(); +OpKernel *getLogistic(); +OpKernel *getPad(); +OpKernel *getReLU(); +OpKernel *getReLU1(); +OpKernel *getReLU6(); +OpKernel *getTanh(); + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_REGISTRATION_H__ diff --git a/runtime/neurun/core/src/exec/interp/Tensor.cc b/runtime/neurun/core/src/exec/interp/Tensor.cc new file mode 100644 index 000000000..5c1da3587 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Tensor.cc @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Tensor.h" + +#define NO_USE(a) (void)(a) + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +void ITensor::access(const std::function<void(backend::operand::ITensor &tensor)> &fn) +{ + fn(*this); +} + +size_t ROTensor::calcOffset(const neurun::util::Coordinates &coords) const +{ + NO_USE(coords); + throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now."); +} + +size_t Tensor::calcOffset(const neurun::util::Coordinates &coords) const +{ + NO_USE(coords); + throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now."); +} + +ir::Layout ROTensor::layout() const +{ + // TODO Changes to return frontend layout + return ir::Layout::NHWC; +} + +ir::Layout Tensor::layout() const +{ + // TODO Changes to return frontend layout + return ir::Layout::NHWC; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/Tensor.h b/runtime/neurun/core/src/exec/interp/Tensor.h new file mode 100644 index 000000000..c53fd46a6 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Tensor.h @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Tensor.h + * @brief This file contains ITensor interface, ROTensor class, and Tensor class + */ +#ifndef __NEURUN_EXEC_INTERP_TENSOR_H__ +#define __NEURUN_EXEC_INTERP_TENSOR_H__ + +#include "Buffer.h" + +#include "ir/OperandInfo.h" +#include "backend/operand/ITensor.h" +#include "ir/Layout.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Interface to handle Tensor in interpreter + */ +class ITensor : public backend::operand::ITensor +{ +public: + virtual ~ITensor() = default; + +public: + virtual uint8_t *buffer() const = 0; + /** + * @brief Return shared pointer for buffer + * @return Buffer shared pointer + */ + virtual std::shared_ptr<const Buffer> shareBuffer() const = 0; + /** + * @brief Return read-only buffer pointer + * @return Read-only buffer pointer + */ + virtual const uint8_t *bufferRO() const = 0; + /** + * @brief Return shared pointer for data + * @return Data shared pointer + */ + virtual std::shared_ptr<const ir::Data> shareData() const = 0; + /** + * @brief Set internal/external buffer + * @param[in] buffer Buffer pointer + */ + virtual void setBuffer(std::shared_ptr<const Buffer> buffer) = 0; + /** + * @brief Set data reference (including constant, input) + * @param[in] data Data pointer + */ + virtual void setData(std::shared_ptr<const ir::Data> data) = 0; + virtual void releaseData() = 0; + + virtual size_t total_size() const = 0; + virtual size_t dimension(size_t index) const = 0; + virtual size_t num_dimensions() const = 0; + virtual size_t calcOffset(const util::Coordinates &coords) const = 0; + + virtual bool has_padding() const = 0; + /** + * @brief Return data type of tensor + * @return Data type of tensor + */ + virtual ir::DataType data_type() const = 0; + /** + * @brief Return TensorInfo + * @return TensorInfo + */ + virtual const ir::OperandInfo &tensorInfo() const = 0; + /** + * @brief Return number of elements + * @return Number of elements + */ + virtual uint64_t num_elements() const = 0; + void access(const std::function<void(backend::operand::ITensor &tensor)> &fn) final; +}; + +/** + * @brief Class to handle tensor in interpreter as read-only + */ +class ROTensor final : public ITensor +{ +public: + ROTensor() = delete; + ROTensor(const ir::OperandInfo &info) : _info(info) + { + // DO NOTHING + } + +public: + uint8_t *buffer() const override { throw std::runtime_error{"Read only tensor"}; } + std::shared_ptr<const Buffer> shareBuffer() const override + { + throw std::runtime_error{"Read only tensor"}; + } + const uint8_t *bufferRO() const override { return _data->base(); } + std::shared_ptr<const ir::Data> shareData() const override { return _data; } + void setBuffer(std::shared_ptr<const Buffer> buffer) override { _data = buffer; } + void setData(std::shared_ptr<const ir::Data> data) override { _data = data; } + void releaseData() override { _data = nullptr; } + + size_t total_size() const override { return _info.total_size(); } + size_t dimension(size_t index) const override { return _info.shape().dim(index); } + size_t num_dimensions() const override { return _info.shape().rank(); } + size_t calcOffset(const util::Coordinates &coords) const override; + ir::Layout layout() const override; + bool has_padding() const override { return false; } + ir::DataType data_type() const override { return _info.typeInfo().type(); } + const ir::OperandInfo &tensorInfo() const override { return _info; } + uint64_t num_elements() const override { return _info.shape().num_elements(); }; + +private: + const ir::OperandInfo _info; + std::shared_ptr<const ir::Data> _data{nullptr}; +}; + +/** + * @brief Class to handle tensor in interpreter as writable + */ +class Tensor final : public ITensor +{ +public: + Tensor() = delete; + Tensor(const ir::OperandInfo &info) : _info(info) + { + // DO NOTHING + } + +public: + uint8_t *buffer() const override { return _buffer->baseWritable(); } + std::shared_ptr<const Buffer> shareBuffer() const override { return _buffer; }; + const uint8_t *bufferRO() const override { return _buffer->base(); } + std::shared_ptr<const ir::Data> shareData() const override { return _buffer; } + void setBuffer(std::shared_ptr<const Buffer> buffer) override { _buffer = buffer; } + void setData(std::shared_ptr<const ir::Data>) override + { + throw std::runtime_error{"Passed data may read-only"}; + } + void releaseData() override { _buffer = nullptr; } + + size_t total_size() const override { return _info.total_size(); } + size_t dimension(size_t index) const override { return _info.shape().dim(index); } + size_t num_dimensions() const override { return _info.shape().rank(); } + size_t calcOffset(const util::Coordinates &coords) const override; + ir::Layout layout() const override; + bool has_padding() const override { return false; } + ir::DataType data_type() const override { return _info.typeInfo().type(); } + const ir::OperandInfo &tensorInfo() const override { return _info; } + uint64_t num_elements() const override { return _info.shape().num_elements(); }; + +private: + const ir::OperandInfo _info; + std::shared_ptr<const Buffer> _buffer{nullptr}; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_TENSOR_H__ diff --git a/runtime/neurun/core/src/exec/interp/operations/AvgPool2D.cc b/runtime/neurun/core/src/exec/interp/operations/AvgPool2D.cc new file mode 100644 index 000000000..bd396491f --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/AvgPool2D.cc @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/AveragePool.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/AvgPool2D.h" +#include "util/Utils.h" +#include "util/Padding.h" +#include "util/ShapeInference.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace avgpool2d +{ + +void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + UNUSED_RELEASE(in_tensor); + + assert(in_tensor->num_dimensions() == 4); + + const auto output_info = env->graph().operands().at(out_index).info(); + if (output_info.total_size() == 0) + { + // Handle unspecified output shape + const auto &avgpool_node = + nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node); + const auto infered_output_shapes = + shape_inference::inferAvgPoolShape(in_tensor->tensorInfo().shape(), avgpool_node.param()); + env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()}); + } + else + { + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 4); +} + +void invoke(const ITensor *in_tensor, const ITensor *out_tensor, + const ir::operation::AvgPool2D::Param ¶m) +{ + // TODO Support NCHW frontend + const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape, + param.stride, param.kw, param.kh); + // Calculate + nnfw::cker::PoolParams cker_param; + calculateActivationRange(param.activation, &cker_param.float_activation_min, + &cker_param.float_activation_max); + cker_param.filter_width = param.kw; + cker_param.filter_height = param.kh; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + + const auto in_shape = convertShape(in_tensor->tensorInfo().shape()); + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO()); + float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer()); + + nnfw::cker::AveragePool(cker_param, in_shape, in_ptr, out_shape, out_ptr); +} + +void invokeAvgPool2D(const ExecEnv *env, const ir::Operation &node) +{ + const auto &avgpool_node = + nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node); + + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + // Check lhs shape is same with rhs (with broadcast) + const auto in_tensor = env->tensorAt(in_index); + const auto out_tensor = env->tensorAt(out_index); + + const auto data_type = in_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(in_tensor, out_tensor, avgpool_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float only"}; + } +} +} // namespace avgpool2d + +OpKernel *getAvgPool2D() +{ + static OpKernel kernel = {avgpool2d::prepareAvgPool2D, avgpool2d::invokeAvgPool2D}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/BinaryArithmeticOps.cc b/runtime/neurun/core/src/exec/interp/operations/BinaryArithmeticOps.cc new file mode 100644 index 000000000..16469b9db --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/BinaryArithmeticOps.cc @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/BinaryArithmeticOps.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Add.h" +#include "ir/operation/Sub.h" +#include "ir/operation/Mul.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +enum class OpType +{ + ADD, + SUB, + MUL +}; + +template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation &node) +{ + const auto &add_node = nnfw::misc::polymorphic_downcast<const node_type &>(node); + + const auto lhs_index = node.getInputs().at(add_node.LHS); + const auto rhs_index = node.getInputs().at(add_node.RHS); + const auto out_index = node.getOutputs().at(0); + + const auto lhs_tensor = env->tensorAt(lhs_index); + const auto rhs_tensor = env->tensorAt(rhs_index); + + // Check shape and type lhs is same with rhs + // TODO Util function to compare TensorInfo + if (lhs_tensor->data_type() != rhs_tensor->data_type()) + { + throw std::runtime_error{"Interp(Add): Different input types"}; + } + + bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape()); + if (try_broadcast) + { + bool success = true; + auto out_shape = calcBroadcastShape(lhs_tensor->tensorInfo().shape(), + rhs_tensor->tensorInfo().shape(), success); + if (!success) + { + throw std::runtime_error{"Interp(Add): Fail to brodcasting"}; + } + + auto output_info = ir::OperandInfo(out_shape, lhs_tensor->tensorInfo().typeInfo()); + // We can handle already allocated (ex. model output) + env->allocateIfNeeded(out_index, output_info); + } + else + { + // Output's shape and type is same with input + auto output_info = lhs_tensor->tensorInfo(); + // We can handle already allocated (ex. model output) + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + // Check shape and type lhs is same with output + // TODO Util function to compare TensorInfo + if (lhs_tensor->data_type() != out_tensor->data_type()) + { + throw std::runtime_error{"Interp(Add): Invalid output type"}; + } +} + +inline void setActivationParams(float min, float max, nnfw::cker::BinaryArithmeticOpParam *params) +{ + params->float_activation_min = min; + params->float_activation_max = max; +} + +inline void setActivationParams(int32_t min, int32_t max, + nnfw::cker::BinaryArithmeticOpParam *params) +{ + params->quantized_activation_min = min; + params->quantized_activation_max = max; +} + +template <typename raw_type, typename param_type, OpType op_type> +void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor, + const param_type ¶m) +{ + const auto lhs_buffer = lhs_tensor->bufferRO(); + const auto rhs_buffer = rhs_tensor->bufferRO(); + auto out_buffer = out_tensor->buffer(); + + nnfw::cker::BinaryArithmeticOpParam cker_param; + raw_type activation_min, activation_max; + calculateActivationRange(param.activation, &activation_min, &activation_max); + setActivationParams(activation_min, activation_max, &cker_param); + const raw_type *lhs_ptr = reinterpret_cast<const raw_type *>(lhs_buffer); + const raw_type *rhs_ptr = reinterpret_cast<const raw_type *>(rhs_buffer); + raw_type *out_ptr = reinterpret_cast<raw_type *>(out_buffer); + + // Calculate + const std::function<raw_type(const raw_type &, const raw_type &)> fn_add = + [](const raw_type &a, const raw_type &b) { return a + b; }; + const std::function<raw_type(const raw_type &, const raw_type &)> fn_sub = + [](const raw_type &a, const raw_type &b) { return a - b; }; + const std::function<raw_type(const raw_type &, const raw_type &)> fn_mul = + [](const raw_type &a, const raw_type &b) { return a * b; }; + + const std::function<raw_type(const raw_type &, const raw_type &)> fn = + (op_type == OpType::ADD) ? fn_add : ((op_type == OpType::SUB) ? fn_sub : fn_mul); + + if (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape()) + { + const auto lhs_shape = convertExtendShape(lhs_tensor->tensorInfo().shape()); + const auto rhs_shape = convertExtendShape(rhs_tensor->tensorInfo().shape()); + const auto out_shape = convertExtendShape(out_tensor->tensorInfo().shape()); + nnfw::cker::BroadcastBinaryArithmeticOpSlow(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr, + out_shape, out_ptr, fn); + return; + } + + const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape()); + const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape()); + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + nnfw::cker::BinaryArithmeticOp(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr, out_shape, + out_ptr, fn); +} + +template <typename node_type, typename param_type, OpType op_type> +void invokeAdd(const ExecEnv *env, const ir::Operation &node) +{ + const auto &arithmetic_node = nnfw::misc::polymorphic_downcast<const node_type &>(node); + + const auto lhs_index = node.getInputs().at(arithmetic_node.LHS); + const auto rhs_index = node.getInputs().at(arithmetic_node.RHS); + const auto out_index = node.getOutputs().at(0); + const auto lhs_tensor = env->tensorAt(lhs_index); + const auto rhs_tensor = env->tensorAt(rhs_index); + const auto out_tensor = env->tensorAt(out_index); + const auto data_type = lhs_tensor->data_type(); + + if (data_type == ir::DataType::INT32) + { + invoke<int32_t, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor, + arithmetic_node.param()); + } + else if (data_type == ir::DataType::FLOAT32) + { + invoke<float, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor, arithmetic_node.param()); + } + else + { + throw std::runtime_error{"NYI: Unsupported data type"}; + } +} +} // namespace add + +OpKernel *getAdd() +{ + static OpKernel kernel = {prepareAdd<ir::operation::Add>, + invokeAdd<ir::operation::Add, ir::operation::Add::Param, OpType::ADD>}; + return &kernel; +} + +OpKernel *getSub() +{ + static OpKernel kernel = {prepareAdd<ir::operation::Sub>, + invokeAdd<ir::operation::Sub, ir::operation::Sub::Param, OpType::SUB>}; + return &kernel; +} + +OpKernel *getMul() +{ + static OpKernel kernel = {prepareAdd<ir::operation::Mul>, + invokeAdd<ir::operation::Mul, ir::operation::Mul::Param, OpType::MUL>}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/Concat.cc b/runtime/neurun/core/src/exec/interp/operations/Concat.cc new file mode 100644 index 000000000..a127e5f30 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Concat.cc @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Concatenation.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Concat.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace concat +{ + +void prepareConcat(ExecEnv *env, const ir::Operation &node) +{ + const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node); + + const auto first_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto first_tensor = env->tensorAt(first_index); + uint32_t out_axis_dimension = 0; + const int32_t axis_raw = concat_node.param().axis; + const uint32_t axis = (axis_raw < 0) ? (axis_raw + first_tensor->num_dimensions()) : axis_raw; + + // All inputs shape should be same except axis dimension + // All inputs type should be same + for (auto input : node.getInputs()) + { + assert(first_tensor->num_dimensions() == env->tensorAt(input)->num_dimensions()); + assert(first_tensor->data_type() == env->tensorAt(input)->data_type()); + for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++) + { + if (i == axis) + { + out_axis_dimension += env->tensorAt(input)->dimension(i); + continue; + } + assert(first_tensor->dimension(i) == env->tensorAt(input)->dimension(i)); + } + } + + // Make output tensor info using first input tensor info, and accumulated axis dimension value + auto out_shape = first_tensor->tensorInfo().shape(); + out_shape.dim(axis) = out_axis_dimension; + env->allocateIfNeeded(out_index, + ir::OperandInfo{out_shape, first_tensor->tensorInfo().typeInfo()}); + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Output shape should be same with input except axis dimension + // Output type should be same with input + assert(first_tensor->data_type() == out_tensor->data_type()); + for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++) + { + if (i == axis) + { + continue; + } + assert(first_tensor->dimension(i) == out_tensor->dimension(i)); + } +} + +void invoke(const std::vector<const ITensor *> in_tensors, const ITensor *out_tensor, uint32_t axis) +{ + const uint32_t count = in_tensors.size(); + + // Calculate + nnfw::cker::ConcatenationParams cker_param; + cker_param.axis = (int8_t)axis; + cker_param.inputs_count = count; + + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + + std::vector<nnfw::cker::Shape> in_shapes; + std::vector<const nnfw::cker::Shape *> in_shape_ptrs; + in_shapes.reserve(count); + in_shape_ptrs.reserve(count); + std::vector<const float *> in_ptrs; + for (uint32_t i = 0; i < count; i++) + { + in_shapes.push_back(convertShape(in_tensors[i]->tensorInfo().shape())); + in_shape_ptrs.push_back(&in_shapes[i]); + in_ptrs.push_back(reinterpret_cast<const float *>(in_tensors[i]->bufferRO())); + } + + auto out_buffer = out_tensor->buffer(); + float *out_ptr = reinterpret_cast<float *>(out_buffer); + + nnfw::cker::Concatenation<float>(cker_param, in_shape_ptrs.data(), in_ptrs.data(), out_shape, + out_ptr); +} + +void invokeConcat(const ExecEnv *env, const ir::Operation &node) +{ + const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node); + const int32_t axis_raw = concat_node.param().axis; + + std::vector<const ITensor *> in_tensors; + for (const auto &e : concat_node.getInputs()) + { + in_tensors.emplace_back(env->tensorAt(e)); + } + + const auto out_index = node.getOutputs().at(0); + const auto out_tensor = env->tensorAt(out_index); + const uint32_t axis = (axis_raw < 0) ? (axis_raw + out_tensor->num_dimensions()) : axis_raw; + + const auto data_type = in_tensors[0]->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(in_tensors, out_tensor, axis); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} +} // namespace concat + +OpKernel *getConcat() +{ + static OpKernel kernel = {concat::prepareConcat, concat::invokeConcat}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/Conv2D.cc b/runtime/neurun/core/src/exec/interp/operations/Conv2D.cc new file mode 100644 index 000000000..5242247a4 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Conv2D.cc @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Conv.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Conv2D.h" +#include "util/Utils.h" +#include "util/Padding.h" +#include "util/ShapeInference.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace conv2d +{ + +void prepareConv2D(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(ir::operation::Conv2D::INPUT); + const auto kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL); + const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto kernel_tensor = env->tensorAt(kernel_index); + const auto bias_tensor = env->tensorAt(bias_index); + + assert(in_tensor->num_dimensions() == 4); + assert(kernel_tensor->num_dimensions() == 4); + assert(bias_tensor->num_dimensions() == 1); + + UNUSED_RELEASE(in_tensor); + UNUSED_RELEASE(kernel_tensor); + UNUSED_RELEASE(bias_tensor); + + const auto output_info = env->graph().operands().at(out_index).info(); + if (output_info.total_size() == 0) + { + // Handle unspecified output shape + const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node); + const auto infered_output_shapes = shape_inference::inferConv2DShape( + in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), conv_node.param()); + env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()}); + } + else + { + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 4); +} + +void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor, + const ITensor *ofm_tensor, const ir::operation::Conv2D::Param ¶m) +{ + // TODO Support NCHW frontned + const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &ker_shape = ker_tensor->tensorInfo().shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape, + param.stride, ker_width, ker_height); + + // Calculate + float activation_min, activation_max; + calculateActivationRange(param.activation, &activation_min, &activation_max); + + nnfw::cker::ConvParams cker_param; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + cker_param.dilation_width_factor = 1; + cker_param.dilation_height_factor = 1; + cker_param.float_activation_min = activation_min; + cker_param.float_activation_max = activation_max; + + const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape()); + const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape()); + const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape()); + const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape()); + const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO()); + const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO()); + const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO()); + float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer()); + + nnfw::cker::Conv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, cker_bias_shape, + bias_ptr, cker_ofm_shape, ofm_ptr); +} + +void invokeConv2D(const ExecEnv *env, const ir::Operation &node) +{ + const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node); + + const auto ifm_index = node.getInputs().at(ir::operation::Conv2D::INPUT); + const auto ker_index = node.getInputs().at(ir::operation::Conv2D::KERNEL); + const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto bias_tensor = env->tensorAt(bias_index); + const auto ofm_tensor = env->tensorAt(ofm_index); + + const auto data_type = ifm_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} +} // namespace conv2d + +OpKernel *getConv2D() +{ + static OpKernel kernel = {conv2d::prepareConv2D, conv2d::invokeConv2D}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/DepthwiseConv.cc b/runtime/neurun/core/src/exec/interp/operations/DepthwiseConv.cc new file mode 100644 index 000000000..1d3649f48 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/DepthwiseConv.cc @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/DepthwiseConv.h> +#include <misc/polymorphic_downcast.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/DepthwiseConv2D.h" +#include "util/Padding.h" +#include "util/Utils.h" +#include "util/ShapeInference.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +namespace +{ + +void prepareDepthwiseConv(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT); + const auto kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL); + const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto kernel_tensor = env->tensorAt(kernel_index); + const auto bias_tensor = env->tensorAt(bias_index); + + assert(in_tensor->num_dimensions() == 4); + assert(kernel_tensor->num_dimensions() == 4); + assert(bias_tensor->num_dimensions() == 1); + + UNUSED_RELEASE(in_tensor); + UNUSED_RELEASE(kernel_tensor); + UNUSED_RELEASE(bias_tensor); + + // TODO handle unspecified output shape: + // calculate output shape using ifm shape, kernel shape, padding, stride + const auto output_info = env->graph().operands().at(out_index).info(); + if (output_info.total_size() == 0) + { + // Handle unspecified output shape + const auto &depth_conv_node = + nnfw::misc::polymorphic_downcast<const ir::operation::DepthwiseConv2D &>(node); + const auto infered_output_shapes = shape_inference::inferDepthwiseConv2DShape( + in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), + depth_conv_node.param()); + env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()}); + } + else + { + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 4); +} + +void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor, + const ITensor *ofm_tensor, const ir::operation::DepthwiseConv2D::Param ¶m) +{ + // TODO Support NCHW frontend + const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + // Kernel format is [1, kernel_height, kernel_width, depth_out]. + const auto &ker_shape = ker_tensor->tensorInfo().shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape, + param.stride, ker_width, ker_height); + + // Calculate + float activation_min, activation_max; + calculateActivationRange(param.activation, &activation_min, &activation_max); + + nnfw::cker::DepthwiseConvParams cker_param; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.depth_multiplier = param.multiplier; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + cker_param.dilation_width_factor = 1; + cker_param.dilation_height_factor = 1; + cker_param.float_activation_min = activation_min; + cker_param.float_activation_max = activation_max; + + const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape()); + const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape()); + const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape()); + const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape()); + const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO()); + const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO()); + const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO()); + float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer()); + + nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, + cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr); +} + +void invokeDepthwiseConv(const ExecEnv *env, const ir::Operation &node) +{ + const auto &conv_node = static_cast<const ir::operation::DepthwiseConv2D &>(node); + + const auto ifm_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT); + const auto ker_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL); + const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto bias_tensor = env->tensorAt(bias_index); + const auto ofm_tensor = env->tensorAt(ofm_index); + + const auto data_type = ifm_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} + +} // namespace + +OpKernel *getDepthwiseConv() +{ + static OpKernel kernel = {prepareDepthwiseConv, invokeDepthwiseConv}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/FullyConnected.cc b/runtime/neurun/core/src/exec/interp/operations/FullyConnected.cc new file mode 100644 index 000000000..9c1c5d4e2 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/FullyConnected.cc @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/FullyConnected.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/FullyConnected.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace fc +{ + +void prepareFC(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(ir::operation::FullyConnected::INPUT); + const auto kernel_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT); + const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto kernel_tensor = env->tensorAt(kernel_index); + const auto bias_tensor = env->tensorAt(bias_index); + + UNUSED_RELEASE(in_tensor); + UNUSED_RELEASE(kernel_tensor); + UNUSED_RELEASE(bias_tensor); + + assert(in_tensor->num_dimensions() >= 2); + assert(kernel_tensor->num_dimensions() == 2); + assert(bias_tensor->num_dimensions() == 1); + + const auto input_size_with_batch = in_tensor->num_elements(); + const auto num_units = kernel_tensor->dimension(0); + const auto input_size = kernel_tensor->dimension(1); + const auto batch_size = input_size_with_batch / input_size; + assert(input_size_with_batch % input_size == 0); + assert(num_units == bias_tensor->dimension(0)); + + // Make output tensor info + ir::Shape output_shape(2); + output_shape.dim(0) = batch_size; + output_shape.dim(1) = num_units; + const ir::OperandInfo out_info{output_shape, in_tensor->tensorInfo().typeInfo()}; + env->allocateIfNeeded(out_index, out_info); + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 2); + assert(out_tensor->dimension(0) == batch_size); + assert(out_tensor->dimension(1) == num_units); +} + +void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor, + const ITensor *ofm_tensor, const ir::operation::FullyConnected::Param ¶m) +{ + const auto ifm_buffer = ifm_tensor->bufferRO(); + const auto ker_buffer = ker_tensor->bufferRO(); + const auto bias_buffer = bias_tensor->bufferRO(); + auto ofm_buffer = ofm_tensor->buffer(); + + // Calculate + nnfw::cker::FullyConnectedParams cker_param; + calculateActivationRange(param.activation, &cker_param.float_activation_min, + &cker_param.float_activation_max); + const auto cker_ifm_shape = convertExtendShape(ifm_tensor->tensorInfo().shape()); + const auto cker_ker_shape = convertExtendShape(ker_tensor->tensorInfo().shape()); + const auto cker_bias_shape = convertExtendShape(bias_tensor->tensorInfo().shape()); + const auto cker_ofm_shape = convertExtendShape(ofm_tensor->tensorInfo().shape()); + const float *ifm_ptr = reinterpret_cast<const float *>(ifm_buffer); + const float *ker_ptr = reinterpret_cast<const float *>(ker_buffer); + const float *bias_ptr = reinterpret_cast<const float *>(bias_buffer); + float *ofm_ptr = reinterpret_cast<float *>(ofm_buffer); + + nnfw::cker::FullyConnected(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, + cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr); +} + +void invokeFC(const ExecEnv *env, const ir::Operation &node) +{ + const auto &conv_node = + nnfw::misc::polymorphic_downcast<const ir::operation::FullyConnected &>(node); + + const auto ifm_index = node.getInputs().at(ir::operation::FullyConnected::INPUT); + const auto ker_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT); + const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto bias_tensor = env->tensorAt(bias_index); + const auto ofm_tensor = env->tensorAt(ofm_index); + + const auto data_type = ifm_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float only"}; + } +} +} // namespace fc + +OpKernel *getFullyConnected() +{ + static OpKernel kernel = {fc::prepareFC, fc::invokeFC}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/Gather.cc b/runtime/neurun/core/src/exec/interp/operations/Gather.cc new file mode 100644 index 000000000..8b64d1937 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Gather.cc @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Gather.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Gather.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void prepareGather(ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT); + const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + const auto indices_tensor = env->tensorAt(indices_index); + + // TODO handle unspecified output shape: + // calculate output shape using ifm shape, kernel shape, padding, stride + const auto output_info = env->graph().operands().at(output_index).info(); + if (output_info.total_size() == 0) + { + throw std::runtime_error{"Interp(Gather): NYI for unspecified output shape"}; + } + else + { + env->allocateIfNeeded(output_index, output_info); + } + + if (indices_tensor->data_type() != ir::DataType::INT32) + { + throw std::runtime_error{"Interp(Gather): Invalid indices data type"}; + } + + auto output_tensor = env->tensorAt(output_index); + auto output_rank = input_tensor->num_dimensions() + indices_tensor->num_dimensions() - 1; + + if (output_rank != output_tensor->num_dimensions()) + { + throw std::runtime_error{"Interp(Gather): Invalid output rank"}; + } + if (output_tensor->data_type() != input_tensor->data_type()) + { + throw std::runtime_error{"Interp(Gather): Invalid output data type"}; + } + + if (input_tensor->data_type() == ir::DataType::QUANT8_ASYMM && + input_tensor->tensorInfo().typeInfo() != output_tensor->tensorInfo().typeInfo()) + { + throw std::runtime_error{ + "Interp(Gather): Cannot handle different I/O QUANT8_ASYMM scale/offset"}; + } +} + +template <typename raw_type> +void invoke(const ITensor *input_tensors, const ITensor *indices_tensors, + const ITensor *output_tensor, uint32_t axis) +{ + // Calculate + nnfw::cker::GatherParams cker_param; + cker_param.axis = (int8_t)axis; + + const auto cker_input_shapes = convertShape(input_tensors->tensorInfo().shape()); + const auto cker_indices_shape = convertShape(indices_tensors->tensorInfo().shape()); + const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); + const raw_type *input_ptr = reinterpret_cast<const raw_type *>(input_tensors->bufferRO()); + const int32_t *indices_ptr = reinterpret_cast<const int32_t *>(indices_tensors->bufferRO()); + raw_type *output_ptr = reinterpret_cast<raw_type *>(output_tensor->buffer()); + + nnfw::cker::Gather<raw_type>(cker_param, cker_input_shapes, input_ptr, cker_indices_shape, + indices_ptr, cker_output_shape, output_ptr); +} + +void invokeGather(const ExecEnv *env, const ir::Operation &node) +{ + const auto &gather_node = nnfw::misc::polymorphic_downcast<const ir::operation::Gather &>(node); + const int32_t axis_raw = gather_node.param().axis; + + const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT); + const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + const auto indices_tensor = env->tensorAt(indices_index); + const auto output_tensor = env->tensorAt(output_index); + const uint32_t axis = (axis_raw < 0) ? (axis_raw + input_tensor->num_dimensions()) : axis_raw; + + const auto data_type = input_tensor->data_type(); + + switch (data_type) + { + case ir::DataType::FLOAT32: + invoke<float>(input_tensor, indices_tensor, output_tensor, axis); + break; + case ir::DataType::INT32: + invoke<int32_t>(input_tensor, indices_tensor, output_tensor, axis); + break; + case ir::DataType::QUANT8_ASYMM: + invoke<uint8_t>(input_tensor, indices_tensor, output_tensor, axis); + break; + default: + throw std::runtime_error{"Interp(Gather): NYI - Not supported type"}; + } +} + +} // namespace concat + +OpKernel *getGather() +{ + static OpKernel kernel = {prepareGather, invokeGather}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/InstanceNorm.cc b/runtime/neurun/core/src/exec/interp/operations/InstanceNorm.cc new file mode 100644 index 000000000..d1623d53c --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/InstanceNorm.cc @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/InstanceNorm.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/InstanceNorm.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace instancenorm +{ + +void prepareInstanceNorm(ExecEnv *env, const ir::Operation &node) +{ + const auto &instancenorm_node = + nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node); + + const auto input_index = node.getInputs().at(instancenorm_node.INPUT); + const auto output_index = node.getOutputs().at(0); + const auto input_tensor = env->tensorAt(input_index); + + if (input_tensor->num_dimensions() != 4) + { + throw std::runtime_error{"Interp(InstanceNorm): Input should be 4D-tensor"}; + } + + // Output shape should be same with input + env->allocateIfNeeded(output_index, input_tensor->tensorInfo()); + + auto output_tensor = env->tensorAt(output_index); + UNUSED_RELEASE(output_tensor); + + // Handle same ifm & ofm data type only + assert(input_tensor->data_type() == output_tensor->data_type()); + assert(input_tensor->tensorInfo().shape() == output_tensor->tensorInfo().shape()); +} + +inline void setActivationParams(float min, float max, nnfw::cker::InstanceNormParams *params) +{ + params->float_activation_min = min; + params->float_activation_max = max; +} + +void invoke(const ITensor *input_tensor, const ITensor *gamma_tensor, const ITensor *beta_tensor, + const ITensor *output_tensor, const ir::operation::InstanceNorm::Param ¶m) +{ + // Calculate + float activation_min, activation_max; + calculateActivationRange(param.activation, &activation_min, &activation_max); + + nnfw::cker::InstanceNormParams cker_param; + cker_param.epsilon = param.epsilon; + cker_param.float_activation_min = activation_min; + cker_param.float_activation_max = activation_max; + + const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape()); + const auto cker_gamma_shape = convertShape(gamma_tensor->tensorInfo().shape()); + const auto cker_beta_shape = convertShape(beta_tensor->tensorInfo().shape()); + const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); + const float *input_ptr = reinterpret_cast<const float *>(input_tensor->bufferRO()); + const float *gamma_ptr = reinterpret_cast<const float *>(gamma_tensor->bufferRO()); + const float *beta_ptr = reinterpret_cast<const float *>(beta_tensor->bufferRO()); + float *output_ptr = reinterpret_cast<float *>(output_tensor->buffer()); + + nnfw::cker::InstanceNorm(cker_param, cker_input_shape, input_ptr, cker_gamma_shape, gamma_ptr, + cker_beta_shape, beta_ptr, cker_output_shape, output_ptr); +} + +void invokeInstanceNorm(const ExecEnv *env, const ir::Operation &node) +{ + const auto &instancenorm_node = + nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node); + + const auto input_index = node.getInputs().at(instancenorm_node.INPUT); + const auto gamma_index = node.getInputs().at(instancenorm_node.GAMMA); + const auto beta_index = node.getInputs().at(instancenorm_node.BETA); + const auto out_index = node.getOutputs().at(0); + const auto input_tensor = env->tensorAt(input_index); + const auto gamma_tensor = env->tensorAt(gamma_index); + const auto beta_tensor = env->tensorAt(beta_index); + const auto out_tensor = env->tensorAt(out_index); + const auto data_type = input_tensor->data_type(); + + if (data_type == ir::DataType::FLOAT32) + { + invoke(input_tensor, gamma_tensor, beta_tensor, out_tensor, instancenorm_node.param()); + } + else + { + throw std::runtime_error{"NYI: Unsupported data type"}; + } +} +} // namespace instancenorm + +OpKernel *getInstanceNorm() +{ + static OpKernel kernel = {instancenorm::prepareInstanceNorm, instancenorm::invokeInstanceNorm}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/Logistic.cc b/runtime/neurun/core/src/exec/interp/operations/Logistic.cc new file mode 100644 index 000000000..2fc68ffd2 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Logistic.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Logistic.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Logistic.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void prepareLogistic(ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(0); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + + const auto output_info = env->graph().operands().at(output_index).info(); + + // Check shape and type lhs is same with rhs + // TODO Util function to compare TensorInfo + if (output_info.total_size() == 0) + { + throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"}; + } + else + { + env->allocateIfNeeded(output_index, output_info); + } + + const auto output_tensor = env->tensorAt(output_index); + if (input_tensor->data_type() != output_tensor->data_type()) + { + throw std::runtime_error{"Interp(Logistic): Invalid output type"}; + } +} + +void invoke(const ITensor *input_tensor, const ITensor *output_tensor) +{ + const auto input_buffer = input_tensor->bufferRO(); + auto output_buffer = output_tensor->buffer(); + + const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape()); + const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); + const float *input_ptr = reinterpret_cast<const float *>(input_buffer); + float *output_ptr = reinterpret_cast<float *>(output_buffer); + + nnfw::cker::Logistic(cker_input_shape, input_ptr, cker_output_shape, output_ptr); +} + +void invokeLogistic(const ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(0); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + const auto output_tensor = env->tensorAt(output_index); + + const auto data_type = input_tensor->data_type(); + + if (data_type == ir::DataType::FLOAT32) + { + invoke(input_tensor, output_tensor); + } + else + { + throw std::runtime_error{"Interp(Logistic): NYI - Unsupported data type"}; + } +} +} // namespace + +OpKernel *getLogistic() +{ + static OpKernel kernel = {prepareLogistic, invokeLogistic}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/MaxPool2D.cc b/runtime/neurun/core/src/exec/interp/operations/MaxPool2D.cc new file mode 100644 index 000000000..3e1711d8e --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/MaxPool2D.cc @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/MaxPool.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/MaxPool2D.h" +#include "util/Utils.h" +#include "util/Padding.h" +#include "util/ShapeInference.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void prepareMaxPool2D(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + + assert(in_tensor->num_dimensions() == 4); + UNUSED_RELEASE(in_tensor); + + const auto output_info = env->graph().operands().at(out_index).info(); + if (output_info.total_size() == 0) + { + // Handle unspecified output shape + const auto &maxpool_node = + nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node); + const auto infered_output_shapes = + shape_inference::inferMaxPoolShape(in_tensor->tensorInfo().shape(), maxpool_node.param()); + env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()}); + } + else + { + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 4); +} + +void invoke(const ITensor *in_tensor, const ITensor *out_tensor, + const ir::operation::MaxPool2D::Param ¶m) +{ + // TODO support NCHW frontend + const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape, + param.stride, param.kw, param.kh); + // Calculate + nnfw::cker::PoolParams cker_param; + calculateActivationRange(param.activation, &cker_param.float_activation_min, + &cker_param.float_activation_max); + cker_param.filter_width = param.kw; + cker_param.filter_height = param.kh; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + + const auto in_shape = convertShape(in_tensor->tensorInfo().shape()); + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO()); + float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer()); + + nnfw::cker::MaxPool(cker_param, in_shape, in_ptr, out_shape, out_ptr); +} + +void invokeMaxPool2D(const ExecEnv *env, const ir::Operation &node) +{ + const auto &maxpool_node = + nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node); + + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto out_tensor = env->tensorAt(out_index); + + const auto data_type = in_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(in_tensor, out_tensor, maxpool_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} +} // namespace + +OpKernel *getMaxPool2D() +{ + static OpKernel kernel = {prepareMaxPool2D, invokeMaxPool2D}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/OperationUtil.h b/runtime/neurun/core/src/exec/interp/operations/OperationUtil.h new file mode 100644 index 000000000..5f4146bb8 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/OperationUtil.h @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_ +#define __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_ + +#include "ir/Shape.h" +#include "ir/InternalType.h" + +#include <cker/Shape.h> + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +inline nnfw::cker::Shape convertShape(const ir::Shape &shape) +{ + auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end()); + + std::vector<int32_t> raw_shape; + raw_shape.resize(4); + + for (uint32_t i = 0; i < 4; ++i) + { + if (i >= dimensions.size()) + { + raw_shape[i] = 1; + } + else + { + raw_shape[i] = dimensions[i]; + } + } + + return nnfw::cker::GetShape(raw_shape); +} + +inline nnfw::cker::Shape convertExtendShape(const ir::Shape &shape) +{ + auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end()); + + std::vector<int32_t> raw_shape; + raw_shape.resize(4); + uint32_t start = 4 - dimensions.size(); + + for (uint32_t i = 0; i < 4; ++i) + { + if (i < start) + { + raw_shape[i] = 1; + } + else + { + raw_shape[i] = dimensions[i - start]; + } + } + + return nnfw::cker::GetShape(raw_shape); +} + +template <typename T> +void calculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max) +{ + if (activation == ir::Activation::RELU) + { + *activation_min = 0; + *activation_max = std::numeric_limits<T>::max(); + } + else if (activation == ir::Activation::RELU6) + { + *activation_min = 0; + *activation_max = 6; + } + else if (activation == ir::Activation::RELU1) + { + *activation_min = -1; + *activation_max = 1; + } + else if (activation == ir::Activation::NONE) + { + *activation_min = std::numeric_limits<T>::lowest(); + *activation_max = std::numeric_limits<T>::max(); + } + else + { + throw std::runtime_error{"Unsupported activation type"}; + } +} + +inline ir::Shape calcBroadcastShape(const ir::Shape &lhs, const ir::Shape &rhs, bool &success) +{ + int lhs_rank = lhs.rank(); + int rhs_rank = rhs.rank(); + + int out_rank = (lhs_rank > rhs_rank ? lhs_rank : rhs_rank); + ir::Shape out_shape(out_rank); + + int lhs_idim = lhs_rank - 1; + int rhs_idim = rhs_rank - 1; + success = true; + for (int out_idim = out_rank - 1; out_idim >= 0; out_idim--) + { + if (lhs_idim == -1 && rhs_idim == -1) + { + // invalid result + success = false; + break; + } + + if (lhs_idim == -1) + { + out_shape.dim(out_idim) = rhs.dim(rhs_idim); + rhs_idim--; + } + else if (rhs_idim == -1) + { + out_shape.dim(out_idim) = lhs.dim(lhs_idim); + lhs_idim--; + } + else + { + if (lhs.dim(lhs_idim) == rhs.dim(rhs_idim)) + { + out_shape.dim(out_idim) = lhs.dim(lhs_idim); + lhs_idim--; + rhs_idim--; + } + else if (lhs.dim(lhs_idim) == 1) + { + out_shape.dim(out_idim) = rhs.dim(rhs_idim); + lhs_idim--; + rhs_idim--; + } + else if (rhs.dim(rhs_idim) == 1) + { + out_shape.dim(out_idim) = lhs.dim(lhs_idim); + lhs_idim--; + rhs_idim--; + } + else + { + // invalid result + success = false; + break; + } + } + } + + if (lhs_idim != -1 || rhs_idim != -1) + { + // invalid result + success = false; + } + return out_shape; +} + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_ diff --git a/runtime/neurun/core/src/exec/interp/operations/Pad.cc b/runtime/neurun/core/src/exec/interp/operations/Pad.cc new file mode 100644 index 000000000..0c8267a90 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Pad.cc @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Pad.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Pad.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void preparePad(ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + + const auto output_info = env->graph().operands().at(output_index).info(); + + // Check shape and type lhs is same with rhs + // TODO Util function to compare TensorInfo + if (output_info.total_size() == 0) + { + throw std::runtime_error{"Interp(Pad): NYI unspecified output shape"}; + } + else + { + env->allocateIfNeeded(output_index, output_info); + } + + const auto output_tensor = env->tensorAt(output_index); + if (input_tensor->data_type() != output_tensor->data_type()) + { + throw std::runtime_error{"Interp(Pad): Invalid output type"}; + } +} + +void invoke(const ITensor *input_tensor, const ITensor *pad_tensor, const ITensor *output_tensor) +{ + const auto input_buffer = input_tensor->bufferRO(); + const auto pad_buffer = pad_tensor->bufferRO(); + auto output_buffer = output_tensor->buffer(); + + int32_t pad_rank = pad_tensor->dimension(0); + + const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape()); + const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); + const float *input_ptr = reinterpret_cast<const float *>(input_buffer); + const int32_t *pad_ptr = reinterpret_cast<const int32_t *>(pad_buffer); + float *output_ptr = reinterpret_cast<float *>(output_buffer); + + nnfw::cker::Pad(pad_ptr, pad_rank, cker_input_shape, input_ptr, cker_output_shape, output_ptr, + nullptr); +} + +void invokePad(const ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT); + const auto pad_index = node.getInputs().at(ir::operation::Pad::PAD); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + const auto pad_tensor = env->tensorAt(pad_index); + const auto output_tensor = env->tensorAt(output_index); + + const auto data_type = input_tensor->data_type(); + + if (data_type == ir::DataType::FLOAT32) + { + invoke(input_tensor, pad_tensor, output_tensor); + } + else + { + throw std::runtime_error{"Interp(Pad): NYI - Unsupported data type"}; + } +} +} // namespace + +OpKernel *getPad() +{ + static OpKernel kernel = {preparePad, invokePad}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/Reshape.cc b/runtime/neurun/core/src/exec/interp/operations/Reshape.cc new file mode 100644 index 000000000..a160232de --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Reshape.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/interp/Registration.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void prepare(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + // Unspecified shape is not supported in operation node spec now + const auto output_info = env->graph().operands().at(out_index).info(); + env->allocateAndShareIfNeeded(out_index, output_info, in_index); + + assert(output_info.total_size() == env->graph().operands().at(in_index).info().total_size()); +} + +void invoke(const ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + if (env->tensorAt(in_index)->bufferRO() == env->tensorAt(out_index)->bufferRO()) + { + // Same data + return; + } + + const auto output_info = env->graph().operands().at(out_index).info(); + memcpy(env->tensorAt(out_index)->buffer(), env->tensorAt(in_index)->bufferRO(), + output_info.total_size()); +} + +} // namespace {anonymous} + +OpKernel *getReshape() +{ + static OpKernel kernel = {prepare, invoke}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/SoftMax.cc b/runtime/neurun/core/src/exec/interp/operations/SoftMax.cc new file mode 100644 index 000000000..91d98889f --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/SoftMax.cc @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/SoftMax.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Softmax.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void Softmax2D(const float *in, const int input_size, const int batch_size, const float beta, + float *out) +{ + assert(input_size > 0); + + // For each batch + for (int b = 0; b < batch_size; b++) + { + // Find the max coeff. + float max_coeff = in[0]; + for (int i = 1; i < input_size; i++) + { + if (in[i] > max_coeff) + max_coeff = in[i]; + } + + // Compute the normalized sum of exps. + float exp_sum = 0.0; + for (int i = 0; i < input_size; i++) + { + out[i] = std::exp((in[i] - max_coeff) * beta); + exp_sum += out[i]; + } + + // Divide by the sum of exps. + float reciprocal_sum_exp = 1.f / exp_sum; + for (int i = 0; i < input_size; i++) + { + out[i] *= reciprocal_sum_exp; + } + + // Advance in and out pointers for the next batch. + in += input_size; + out += input_size; + } +} + +void prepareSoftMax(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + UNUSED_RELEASE(in_tensor); + + assert((in_tensor->num_dimensions() == 4) || (in_tensor->num_dimensions() == 2)); + + // Output shape should be same with input + // Output type is pre-defined in model + const auto output_shape = env->graph().operands().at(in_index).info().shape(); + const auto output_type = env->graph().operands().at(out_index).info().typeInfo(); + + const ir::OperandInfo output_info{output_shape, output_type}; + env->allocateIfNeeded(out_index, output_info); + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Check output shape is same with input + assert(out_tensor->num_dimensions() == out_tensor->num_dimensions()); + for (uint32_t i = 0; i < in_tensor->num_dimensions(); i++) + { + assert(in_tensor->dimension(i) == out_tensor->dimension(i)); + } +} + +void invoke(const ITensor *in_tensor, const ITensor *out_tensor, + const ir::operation::Softmax::Param ¶m) +{ + const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO()); + float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer()); + + float beta = param.beta; + + if (in_tensor->num_dimensions() == 2) + { + uint32_t batch_size = in_tensor->dimension(0); + uint32_t input_size = in_tensor->dimension(1); + + Softmax2D(in_ptr, input_size, batch_size, beta, out_ptr); + } + else if (in_tensor->num_dimensions() == 4) + { + const auto in_shape = convertShape(in_tensor->tensorInfo().shape()); + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + + nnfw::cker::SoftmaxParams cker_param; + cker_param.beta = beta; + + nnfw::cker::Softmax(cker_param, in_shape, in_ptr, out_shape, out_ptr); + } + else + { + throw std::runtime_error{"Unsuported input dimension: support 2D or 4D"}; + } +} + +void invokeSoftMax(const ExecEnv *env, const ir::Operation &node) +{ + const auto &softmax_node = nnfw::misc::polymorphic_downcast<const ir::operation::Softmax &>(node); + + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto out_tensor = env->tensorAt(out_index); + + const auto in_data_type = in_tensor->data_type(); + const auto out_data_type = out_tensor->data_type(); + if ((in_data_type == ir::DataType::FLOAT32) && (out_data_type == ir::DataType::FLOAT32)) + { + invoke(in_tensor, out_tensor, softmax_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} + +} // namespace + +OpKernel *getSoftMax() +{ + static OpKernel kernel = {prepareSoftMax, invokeSoftMax}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/TransposeConv.cc b/runtime/neurun/core/src/exec/interp/operations/TransposeConv.cc new file mode 100644 index 000000000..70b72c88d --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/TransposeConv.cc @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/TransposeConv.h> +#include <misc/polymorphic_downcast.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/TransposeConv.h" +#include "util/Padding.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void prepareTransposeConv(ExecEnv *env, const ir::Operation &node) +{ + const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT); + const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL); + const auto ofm_shape_index = node.getInputs().at(ir::operation::TransposeConv::OUTPUT_SHAPE); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto ofm_shape_tensor = env->tensorAt(ofm_shape_index); + + assert(ifm_tensor->num_dimensions() == 4); + assert(ker_tensor->num_dimensions() == 4); + assert(ofm_shape_tensor->num_dimensions() == 1); + + UNUSED_RELEASE(ifm_tensor); + UNUSED_RELEASE(ker_tensor); + UNUSED_RELEASE(ofm_shape_tensor); + + const auto output_info = env->graph().operands().at(ofm_index).info(); + if (output_info.total_size() == 0) + { + // TODO: Handle unspecified output shape + throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"}; + } + else + { + env->allocateIfNeeded(ofm_index, output_info); + } + + auto ofm_tensor = env->tensorAt(ofm_index); + UNUSED_RELEASE(ofm_tensor); + + // Handle same ifm & ofm data type only + if (ifm_tensor->data_type() != ofm_tensor->data_type()) + { + throw std::runtime_error{"Interp(TConv): Different I/O data dype"}; + } + + if (ofm_tensor->num_dimensions() != 4) + { + throw std::runtime_error{"Interp(TConv): Invalid output rank"}; + } +} + +void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *ofm_tensor, + const ir::operation::TransposeConv::Param ¶m) +{ + const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto ker_shape = ker_tensor->tensorInfo().shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto padding = neurun::util::calculatePadding(param.padding, ofm_shape, ifm_shape, + param.stride, ker_width, ker_height); + + nnfw::cker::TransposeConvParams cker_param; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + cker_param.dilation_width_factor = 1; + cker_param.dilation_height_factor = 1; + + const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape()); + const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape()); + const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape()); + const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO()); + const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO()); + float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer()); + + nnfw::cker::TransposeConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, + cker_ofm_shape, ofm_ptr); +} + +void invokeTransposeConv(const ExecEnv *env, const ir::Operation &node) +{ + const auto &tconv_node = + nnfw::misc::polymorphic_downcast<const ir::operation::TransposeConv &>(node); + + const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT); + const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto ofm_tensor = env->tensorAt(ofm_index); + + const auto data_type = ifm_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(ifm_tensor, ker_tensor, ofm_tensor, tconv_node.param()); + } + else + { + throw std::runtime_error{"Interp(TConv): Support float32 only"}; + } +} + +} // namespace transposeconv + +OpKernel *getTransposeConv() +{ + static OpKernel kernel = {prepareTransposeConv, invokeTransposeConv}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/UnaryActivations.cc b/runtime/neurun/core/src/exec/interp/operations/UnaryActivations.cc new file mode 100644 index 000000000..116806fc4 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/UnaryActivations.cc @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cmath> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" + +#include "ir/operation/ReLU.h" +#include "ir/operation/ReLU1.h" +#include "ir/operation/ReLU6.h" +#include "ir/operation/Tanh.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +enum class ActivationType +{ + ReLU, + ReLU1, + ReLU6, + Tanh +}; + +void prepare(ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(0); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + + const auto output_info = env->graph().operands().at(output_index).info(); + if (output_info.total_size() == 0) + { + // Output's shape and type is same with input + auto input_info = input_tensor->tensorInfo(); + // We can handle already allocated (ex. model output) + env->allocateIfNeeded(output_index, input_info); + } + else + { + env->allocateIfNeeded(output_index, output_info); + } + + const auto output_tensor = env->tensorAt(output_index); + // Check shape and type lhs is same with output + // TODO Util function to compare TensorInfo + if (input_tensor->data_type() != output_tensor->data_type()) + { + throw std::runtime_error{"Interp(Activations): Invalid output type"}; + } +} + +template <ActivationType act_type> +void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements) +{ + std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); }; + switch (act_type) + { + case ActivationType::ReLU: + fn = [](const float &in) { return std::max(0.f, in); }; + break; + case ActivationType::ReLU1: + fn = [](const float &in) { return std::min(std::max(-1.f, in), 1.f); }; + break; + case ActivationType::ReLU6: + fn = [](const float &in) { return std::min(std::max(0.f, in), 6.f); }; + break; + case ActivationType::Tanh: + fn = [](const float &in) { return std::tanh(in); }; + break; + default: + throw std::runtime_error{"Interp(Activations): NYI - Unsupported activation"}; + break; + } + + const float *input_end = input_ptr + num_elements; + for (; input_ptr < input_end; input_ptr++, output_ptr++) + { + *output_ptr = fn(*input_ptr); + } +} + +template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(0); + const auto output_index = node.getOutputs().at(0); + + // Check lhs shape is same with rhs (with broadcast) + const auto input_tensor = env->tensorAt(input_index); + const auto output_tensor = env->tensorAt(output_index); + + const auto data_type = input_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + uint64_t elements = input_tensor->num_elements(); + const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO()); + float *out = reinterpret_cast<float *>(output_tensor->buffer()); + + evalFloat<act_type>(input_start, out, elements); + } + else + { + throw std::runtime_error{"Interp(ReLU6): NYI - Support float only"}; + } +} + +} // namespace + +OpKernel *getReLU() +{ + static OpKernel kernel = {prepare, invoke<ActivationType::ReLU>}; + return &kernel; +} + +OpKernel *getReLU1() +{ + static OpKernel kernel = {prepare, invoke<ActivationType::ReLU1>}; + return &kernel; +} + +OpKernel *getReLU6() +{ + static OpKernel kernel = {prepare, invoke<ActivationType::ReLU6>}; + return &kernel; +} + +OpKernel *getTanh() +{ + static OpKernel kernel = {prepare, invoke<ActivationType::Tanh>}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun |