summaryrefslogtreecommitdiff
path: root/runtime/neurun/core/src/exec
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/neurun/core/src/exec')
-rw-r--r--runtime/neurun/core/src/exec/DataflowExecutor.cc176
-rw-r--r--runtime/neurun/core/src/exec/DataflowExecutor.h97
-rw-r--r--runtime/neurun/core/src/exec/Execution.cc135
-rw-r--r--runtime/neurun/core/src/exec/ExecutionObservee.cc64
-rw-r--r--runtime/neurun/core/src/exec/ExecutionObservee.h56
-rw-r--r--runtime/neurun/core/src/exec/ExecutionObservers.cc130
-rw-r--r--runtime/neurun/core/src/exec/ExecutorBase.cc145
-rw-r--r--runtime/neurun/core/src/exec/ExecutorBase.h127
-rw-r--r--runtime/neurun/core/src/exec/FunctionSequence.cc62
-rw-r--r--runtime/neurun/core/src/exec/FunctionSequence.h56
-rw-r--r--runtime/neurun/core/src/exec/Job.cc33
-rw-r--r--runtime/neurun/core/src/exec/Job.h69
-rw-r--r--runtime/neurun/core/src/exec/LinearExecutor.cc39
-rw-r--r--runtime/neurun/core/src/exec/LinearExecutor.h64
-rw-r--r--runtime/neurun/core/src/exec/ParallelExecutor.cc147
-rw-r--r--runtime/neurun/core/src/exec/ParallelExecutor.h69
-rw-r--r--runtime/neurun/core/src/exec/ParallelScheduler.cc55
-rw-r--r--runtime/neurun/core/src/exec/ParallelScheduler.h60
-rw-r--r--runtime/neurun/core/src/exec/Sink.h205
-rw-r--r--runtime/neurun/core/src/exec/Source.h211
-rw-r--r--runtime/neurun/core/src/exec/ThreadPool.cc65
-rw-r--r--runtime/neurun/core/src/exec/ThreadPool.h73
-rw-r--r--runtime/neurun/core/src/exec/WorkQueue.cc104
-rw-r--r--runtime/neurun/core/src/exec/WorkQueue.h87
-rw-r--r--runtime/neurun/core/src/exec/interp/Buffer.h94
-rw-r--r--runtime/neurun/core/src/exec/interp/ExecEnv.h165
-rw-r--r--runtime/neurun/core/src/exec/interp/ExecManager.cc125
-rw-r--r--runtime/neurun/core/src/exec/interp/ExecManager.h71
-rw-r--r--runtime/neurun/core/src/exec/interp/Interpreter.cc210
-rw-r--r--runtime/neurun/core/src/exec/interp/Interpreter.h67
-rw-r--r--runtime/neurun/core/src/exec/interp/Registration.h63
-rw-r--r--runtime/neurun/core/src/exec/interp/Tensor.cc59
-rw-r--r--runtime/neurun/core/src/exec/interp/Tensor.h180
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/AvgPool2D.cc129
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/BinaryArithmeticOps.cc202
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/Concat.cc150
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/Conv2D.cc152
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/DepthwiseConv.cc159
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/FullyConnected.cc137
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/Gather.cc141
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/InstanceNorm.cc124
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/Logistic.cc102
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/MaxPool2D.cc128
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/OperationUtil.h177
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/Pad.cc109
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/Reshape.cc66
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/SoftMax.cc163
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/TransposeConv.cc145
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/UnaryActivations.cc156
49 files changed, 5603 insertions, 0 deletions
diff --git a/runtime/neurun/core/src/exec/DataflowExecutor.cc b/runtime/neurun/core/src/exec/DataflowExecutor.cc
new file mode 100644
index 000000000..e22d41031
--- /dev/null
+++ b/runtime/neurun/core/src/exec/DataflowExecutor.cc
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DataflowExecutor.h"
+
+#include <cassert>
+
+#include "util/logging.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+int64_t DataflowExecutor::calculateRank(const std::vector<ir::Element> &operations)
+{
+ int64_t rank = 0;
+ if (!_indexed_ranks)
+ {
+ return rank;
+ }
+ for (const auto &element : operations)
+ {
+ auto it = _indexed_ranks->find(element.index);
+ if (it == _indexed_ranks->end())
+ {
+ assert(element.node->opcode() == ir::OpCode::Permute);
+ // assign int32_t::max to prevent integer overflow
+ rank += std::numeric_limits<int32_t>::max();
+ }
+ else
+ {
+ rank += it->second;
+ }
+ }
+ return rank;
+}
+
+void DataflowExecutor::emplaceToReadyJobs(const uint32_t &id)
+{
+ auto &job = _waiting_jobs[id];
+ assert(job != nullptr);
+ auto &subg = _graph.subgraphs()->at(_job_to_op_seq[job->index()]);
+ auto rank = calculateRank(subg.operations());
+ _ready_jobs.emplace(rank, std::move(job));
+}
+
+void DataflowExecutor::notify(uint32_t finished_job_id)
+{
+ for (auto id : _output_info[finished_job_id])
+ {
+ assert(_input_info[id] > 0);
+ auto count = --_input_info[id];
+ if (count == 0) // No dependent jobs left, ready for execution
+ {
+ emplaceToReadyJobs(id);
+ }
+ }
+}
+bool DataflowExecutor::noWaitingJobs()
+{
+ return std::all_of(_waiting_jobs.begin(), _waiting_jobs.end(),
+ [](const std::unique_ptr<Job> &job) { return job == nullptr; });
+}
+
+DataflowExecutor::DataflowExecutor(const ir::Graph &graph,
+ const std::shared_ptr<compiler::OperandContext> &operand_context,
+ std::unique_ptr<backend::TensorManagerSet> tensor_mgrs,
+ CodeMap &&code_map)
+ : ExecutorBase{graph, operand_context, std::move(tensor_mgrs)}, _code_map{std::move(code_map)}
+{
+ VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl;
+
+ const ir::Subgraphs *subgraphs = _graph.subgraphs();
+ // Assign jobs convert SubgraphIndex to job index(uint32_t)
+ uint32_t next_job_index = 0;
+ std::unordered_map<ir::SubgraphIndex, uint32_t> subgraph_to_job;
+ subgraphs->iterate([&](const ir::SubgraphIndex &subg_index, const ir::OpSequence &) {
+ VERBOSE(DataflowExecutor) << "Create a job #" << next_job_index << " with SubgraphIndex "
+ << subg_index.value() << std::endl;
+ _finished_jobs.emplace_back(
+ nnfw::cpp14::make_unique<Job>(next_job_index, _code_map.at(subg_index).get()));
+ subgraph_to_job[subg_index] = next_job_index++;
+ });
+
+ _waiting_jobs.resize(next_job_index);
+ _output_info.resize(next_job_index);
+ _initial_input_info.resize(next_job_index, 0);
+
+ subgraphs->iterate([&](const ir::SubgraphIndex &subg_index, const ir::OpSequence &subg) {
+ auto job_index = subgraph_to_job[subg_index];
+ for (auto output : subg.getOutputs())
+ {
+ // Update output and input info
+ subgraphs->iterate(
+ [&](const ir::SubgraphIndex &subg_cur_index, const ir::OpSequence &subg_cur) {
+ if (subg_cur.getInputs().contains(output))
+ {
+ auto dep_index = subgraph_to_job[subg_cur_index];
+ ++_initial_input_info[dep_index];
+ _output_info[job_index].push_back(dep_index);
+ }
+ });
+ }
+ });
+ for (const auto &s : subgraph_to_job)
+ _job_to_op_seq.emplace(s.second, s.first);
+
+ _input_info = _initial_input_info;
+}
+
+void DataflowExecutor::executeImpl()
+{
+ assert(noWaitingJobs());
+
+ // Execution setup
+ _waiting_jobs.swap(_finished_jobs); // Move finished jobs to waiting jobs
+
+ for (uint32_t i = 0; i < _waiting_jobs.size(); ++i)
+ {
+ if (_input_info[i] == 0)
+ {
+ emplaceToReadyJobs(i);
+ }
+ }
+ assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs
+ bool is_profiling = util::getConfigBool(util::config::PROFILING_MODE);
+
+ _subject.notifyModelBegin(this);
+
+ while (!_ready_jobs.empty())
+ {
+ auto job = std::move((_ready_jobs.begin())->second);
+ _ready_jobs.erase(_ready_jobs.begin());
+ auto job_index = job->index();
+ VERBOSE(DataflowExecutor) << "Run job #" << job_index << std::endl;
+
+ auto subgraph_index = _job_to_op_seq[job_index];
+ auto op_seq = &_graph.subgraphs()->at(subgraph_index);
+ const backend::Backend *backend =
+ _graph.getLowerInfo()->operation.at(subgraph_index)->backend();
+
+ _subject.notifyJobBegin(this, op_seq, backend);
+
+ if (is_profiling)
+ job->fn()->runSync();
+ else
+ job->run();
+
+ _subject.notifyJobEnd(this, op_seq, backend);
+ notify(job_index);
+ _finished_jobs[job_index] = std::move(job);
+ }
+ assert(noWaitingJobs());
+
+ _subject.notifyModelEnd(this);
+
+ // Reset input info for the next execution
+ _input_info = _initial_input_info;
+}
+
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/DataflowExecutor.h b/runtime/neurun/core/src/exec/DataflowExecutor.h
new file mode 100644
index 000000000..6c12093fd
--- /dev/null
+++ b/runtime/neurun/core/src/exec/DataflowExecutor.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_EXEC_DATAFLOW_EXECUTOR_H__
+#define __NEURUN_EXEC_DATAFLOW_EXECUTOR_H__
+
+#include <list>
+#include <map>
+#include <unordered_map>
+
+#include "FunctionSequence.h"
+#include "Job.h"
+#include "ir/OperandIndexSequence.h"
+#include "ir/Index.h"
+#include "cpp14/memory.h"
+#include "exec/ExecutorBase.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+class DataflowExecutor : public ExecutorBase
+{
+public:
+ using CodeMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<FunctionSequence>>;
+
+protected:
+ virtual void notify(uint32_t finished_job_id);
+ bool noWaitingJobs();
+
+public:
+ /**
+ * @brief Constructs a DataflowExecutor object
+ *
+ * @param graph Graph object
+ * @param operand_context (Only for input/output operand data access)
+ * @param code_map Compiled code map
+ * @param ranks Operation ranks for ordering execution
+ */
+ DataflowExecutor(const ir::Graph &graph,
+ const std::shared_ptr<compiler::OperandContext> &operand_context,
+ std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, CodeMap &&code_map);
+
+ void executeImpl() override;
+
+protected:
+ int64_t calculateRank(const std::vector<ir::Element> &operations);
+ void emplaceToReadyJobs(const uint32_t &id);
+
+protected:
+ CodeMap _code_map;
+ /**
+ * @brief A vector of finished jobs for current execution
+ * After a run it has all the jobs of this execution for the next run
+ */
+ std::vector<std::unique_ptr<Job>> _finished_jobs;
+ /**
+ * @brief A vector of waiting jobs for current execution
+ * All the jobs are moved from #_finished_jobs to it when start a run
+ */
+ std::vector<std::unique_ptr<Job>> _waiting_jobs;
+ /**
+ * @brief Jobs' output info
+ * Used for notifying after finishing a job
+ */
+ std::vector<std::list<uint32_t>> _output_info;
+ std::vector<uint32_t> _initial_input_info;
+ std::vector<uint32_t> _input_info;
+ /**
+ * @brief A collection of jobs that are ready for execution
+ * Jobs in it are ready to be scheduled.
+ * Ordered by priority from `_indexed_ranks`
+ */
+ std::multimap<int64_t, std::unique_ptr<Job>, std::greater<int64_t>> _ready_jobs;
+
+ /// @brief Which job runs which op and function.
+ std::unordered_map<uint32_t, ir::SubgraphIndex> _job_to_op_seq;
+};
+
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_DATAFLOW_EXECUTOR_H__
diff --git a/runtime/neurun/core/src/exec/Execution.cc b/runtime/neurun/core/src/exec/Execution.cc
new file mode 100644
index 000000000..bc7bbd160
--- /dev/null
+++ b/runtime/neurun/core/src/exec/Execution.cc
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/Execution.h"
+
+#include "util/logging.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+Execution::Execution(const std::shared_ptr<IExecutor> &executor) : _executor{executor}
+{
+ _io_desc.inputs.resize(_executor->graph().getInputs().size());
+ _io_desc.outputs.resize(_executor->graph().getOutputs().size());
+}
+
+// TODO Remove default parameter
+void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t length,
+ ir::Layout layout)
+{
+ const auto input_index = graph().getInputs().at(index);
+ const auto info = graph().operands().at(input_index).info();
+
+ if (length < info.total_size())
+ {
+ throw std::runtime_error{"Too small length"};
+ }
+
+ _io_desc.inputs.at(index.value()) =
+ nnfw::cpp14::make_unique<InputDesc>(info, buffer, length, layout);
+}
+
+// TODO Remove default parameter
+void Execution::setInput(const ir::IOIndex &index, const ir::TypeInfo &type, const ir::Shape &shape,
+ const void *buffer, size_t length, ir::Layout layout)
+{
+ const ir::OperandInfo info{shape, type};
+
+ if (length < info.total_size())
+ {
+ throw std::runtime_error{"Too small length"};
+ }
+
+ _io_desc.inputs.at(index.value()) =
+ nnfw::cpp14::make_unique<InputDesc>(info, buffer, length, layout);
+}
+
+// TODO Remove default parameter
+void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length, ir::Layout layout)
+{
+ const auto output_index = graph().getOutputs().at(index);
+ const auto info = graph().operands().at(output_index).info();
+
+ if (length < info.total_size())
+ {
+ throw std::runtime_error{"Too small length"};
+ }
+
+ _io_desc.outputs.at(index.value()) =
+ nnfw::cpp14::make_unique<OutputDesc>(info, buffer, length, layout);
+}
+
+// TODO Remove default parameter
+void Execution::setOutput(const ir::IOIndex &index, const ir::TypeInfo &type,
+ const ir::Shape &shape, void *buffer, size_t length, ir::Layout layout)
+{
+ const ir::OperandInfo info{shape, type};
+
+ if (length < info.total_size())
+ {
+ throw std::runtime_error{"Too small length"};
+ }
+
+ _io_desc.outputs.at(index.value()) =
+ nnfw::cpp14::make_unique<OutputDesc>(info, buffer, length, layout);
+}
+
+void Execution::setInputLayout(const ir::IOIndex &index, ir::Layout layout)
+{
+ const auto &input_desc = _io_desc.inputs.at(index.value());
+ _io_desc.inputs.at(index.value()) = nnfw::cpp14::make_unique<InputDesc>(
+ input_desc->info, input_desc->buffer, input_desc->size, layout);
+}
+
+void Execution::setOutputLayout(const ir::IOIndex &index, ir::Layout layout)
+{
+ const auto &output_desc = _io_desc.outputs.at(index.value());
+ _io_desc.outputs.at(index.value()) = nnfw::cpp14::make_unique<OutputDesc>(
+ output_desc->info, output_desc->buffer, output_desc->size, layout);
+}
+
+void Execution::execute()
+{
+ VERBOSE(Execution) << "Start execution" << std::endl;
+
+ _executor->execute(_io_desc);
+ finished = true;
+
+ VERBOSE(Execution) << "Execution finished" << std::endl;
+}
+
+void Execution::startExecute()
+{
+ VERBOSE(Execution) << "Create asynchronous execution thread" << std::endl;
+
+ _exec_thread = nnfw::cpp14::make_unique<std::thread>(&Execution::execute, this);
+}
+
+void Execution::waitFinish()
+{
+ VERBOSE(Execution) << "Wait to finish execution" << std::endl;
+
+ _exec_thread->join();
+ finished = true;
+}
+
+bool Execution::isFinished(void) const { return finished; }
+
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/ExecutionObservee.cc b/runtime/neurun/core/src/exec/ExecutionObservee.cc
new file mode 100644
index 000000000..3b342d703
--- /dev/null
+++ b/runtime/neurun/core/src/exec/ExecutionObservee.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecutionObservee.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+void ExecutionObservee::add(std::unique_ptr<IExecutionObserver> observer)
+{
+ _observers.emplace_back(std::move(observer));
+}
+
+void ExecutionObservee::notifyModelBegin(IExecutor *executor)
+{
+ for (auto &o : _observers)
+ {
+ o->handleBegin(executor);
+ }
+}
+
+void ExecutionObservee::notifyModelEnd(IExecutor *executor)
+{
+ for (auto &o : _observers)
+ {
+ o->handleEnd(executor);
+ }
+}
+
+void ExecutionObservee::notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq,
+ const backend::Backend *backend)
+{
+ for (auto &o : _observers)
+ {
+ o->handleBegin(executor, op_seq, backend);
+ }
+}
+
+void ExecutionObservee::notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq,
+ const backend::Backend *backend)
+{
+ for (auto &o : _observers)
+ {
+ o->handleEnd(executor, op_seq, backend);
+ }
+}
+
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/ExecutionObservee.h b/runtime/neurun/core/src/exec/ExecutionObservee.h
new file mode 100644
index 000000000..dafeef55b
--- /dev/null
+++ b/runtime/neurun/core/src/exec/ExecutionObservee.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_EXEC_EXECUTION_OBSERVEE_H__
+#define __NEURUN_EXEC_EXECUTION_OBSERVEE_H__
+
+#include <list>
+
+#include "exec/ExecutionObservers.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+/**
+ * @brief Class that
+ *
+ */
+class ExecutionObservee
+{
+public:
+ /**
+ * @brief Register an observer
+ *
+ * @param observer Observer to be added
+ */
+ void add(std::unique_ptr<IExecutionObserver> observer);
+ void notifyModelBegin(IExecutor *executor);
+ void notifyModelEnd(IExecutor *executor);
+ void notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq,
+ const backend::Backend *backend);
+ void notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq,
+ const backend::Backend *backend);
+
+private:
+ std::list<std::unique_ptr<IExecutionObserver>> _observers;
+};
+
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_EXECUTION_OBSERVEE__
diff --git a/runtime/neurun/core/src/exec/ExecutionObservers.cc b/runtime/neurun/core/src/exec/ExecutionObservers.cc
new file mode 100644
index 000000000..071a9e228
--- /dev/null
+++ b/runtime/neurun/core/src/exec/ExecutionObservers.cc
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/ExecutionObservers.h"
+
+#include <string>
+
+#include "util/logging.h"
+#include "ir/operation/Permute.h"
+#include "exec/IExecutor.h"
+#include "misc/polymorphic_downcast.h"
+#include "ir/OpSequence.h"
+
+namespace neurun
+{
+
+namespace exec
+{
+
+void ProfileObserver::handleBegin(neurun::exec::IExecutor *, const ir::OpSequence *,
+ const neurun::backend::Backend *backend)
+{
+ _timer = backend->config()->timer();
+ if (_timer == nullptr)
+ throw std::runtime_error("To profile backend timer() method must be implemented");
+ _timer->handleBegin();
+}
+
+void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq,
+ const backend::Backend *backend)
+{
+ _timer->handleEnd();
+ const auto timer_res = _timer->getTime();
+
+ // NOTE This assumes there is just one operation in a op_seq
+ auto node = op_seq->operations().at(0).node;
+ auto node_name = node->name();
+ VERBOSE(ProfileInfo) << "Time for " << node_name << " : " << timer_res << std::endl;
+
+ // fill ExecTime:
+ bool is_quantized = exec->graph().operands().at(node->getInputs().at(0)).typeInfo().type() ==
+ ir::DataType::QUANT8_ASYMM;
+
+ uint32_t size = 0;
+ for (const auto &input : node->getInputs())
+ {
+ size += exec->graph().operands().at(input).info().total_size();
+ }
+ for (const auto &output : node->getOutputs())
+ {
+ size += exec->graph().operands().at(output).info().total_size();
+ }
+ if (node_name == "Permute")
+ {
+ auto *permute_node = nnfw::misc::polymorphic_downcast<const ir::operation::Permute *>(node);
+ assert(permute_node != nullptr);
+ _et->updatePermuteTime(permute_node->param().input_backend_ctx->backend,
+ permute_node->param().output_backend_ctx->backend, is_quantized, size,
+ timer_res);
+ }
+ else
+ {
+ _et->updateOperationExecTime(backend, node_name, is_quantized, size, timer_res);
+ }
+};
+
+ChromeTracingObserver::ChromeTracingObserver(const std::string &filepath)
+ : _ofs{filepath, std::ofstream::out}, _recorder{}, _collector{&_recorder}
+{
+}
+
+ChromeTracingObserver::~ChromeTracingObserver() { _recorder.writeToFile(_ofs); }
+
+void ChromeTracingObserver::handleBegin(IExecutor *)
+{
+ _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "runtime", "Graph"});
+}
+
+void ChromeTracingObserver::handleBegin(IExecutor *, const ir::OpSequence *op_seq,
+ const backend::Backend *backend)
+{
+ std::string backend_id = backend->config()->id();
+ _collector.onEvent(
+ EventCollector::Event{EventCollector::Edge::BEGIN, backend_id, subgraphTag(op_seq)});
+}
+
+void ChromeTracingObserver::handleEnd(IExecutor *, const ir::OpSequence *op_seq,
+ const backend::Backend *backend)
+{
+ std::string backend_id = backend->config()->id();
+ _collector.onEvent(
+ EventCollector::Event{EventCollector::Edge::END, backend_id, subgraphTag(op_seq)});
+}
+
+void ChromeTracingObserver::handleEnd(IExecutor *)
+{
+ _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, "runtime", "Graph"});
+}
+
+std::string ChromeTracingObserver::subgraphTag(const ir::OpSequence *op_seq)
+{
+ if (op_seq->size() == 0)
+ return "Empty OpSequence";
+
+ auto first_op = op_seq->operations().at(0);
+ std::string tag = "$" + std::to_string(first_op.index.value());
+ tag += " " + first_op.node->name();
+ if (op_seq->size() > 1)
+ {
+ tag += " (+" + std::to_string(op_seq->size() - 1) + ")";
+ }
+ return tag;
+}
+
+} // namespace exec
+
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/ExecutorBase.cc b/runtime/neurun/core/src/exec/ExecutorBase.cc
new file mode 100644
index 000000000..9692c2ba7
--- /dev/null
+++ b/runtime/neurun/core/src/exec/ExecutorBase.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecutorBase.h"
+#include "util/logging.h"
+namespace neurun
+{
+namespace exec
+{
+
+ExecutorBase::ExecutorBase(const ir::Graph &graph,
+ const std::shared_ptr<compiler::OperandContext> &operand_context,
+ std::unique_ptr<backend::TensorManagerSet> tensor_mgrs)
+ : _graph{graph}, _operand_context{operand_context}, _tensor_mgrs{std::move(tensor_mgrs)},
+ _mutex()
+{
+ // DO NOTHING
+}
+
+std::unique_ptr<ISource> ExecutorBase::source(const ir::IOIndex &index, const ir::TypeInfo &type,
+ const void *buffer, size_t length,
+ ir::Layout io_layout)
+{
+ using ir::DataType;
+ switch (type.type())
+ {
+ case DataType::FLOAT32:
+ return source<float>(index, buffer, length, io_layout);
+ case DataType::INT32:
+ return source<int32_t>(index, buffer, length, io_layout);
+ case DataType::UINT32:
+ return source<uint32_t>(index, buffer, length, io_layout);
+ case DataType::BOOL8:
+ case DataType::QUANT8_ASYMM:
+ case DataType::UINT8:
+ return source<uint8_t>(index, buffer, length, io_layout);
+ case DataType::QUANT8_SYMM:
+ return source<int8_t>(index, buffer, length, io_layout);
+ default:
+ throw std::runtime_error("Not supported yet");
+ }
+}
+
+std::unique_ptr<ISink> ExecutorBase::sink(const ir::IOIndex &index, const ir::TypeInfo &type,
+ void *buffer, size_t length, ir::Layout io_layout)
+{
+ using ir::DataType;
+ switch (type.type())
+ {
+ case DataType::FLOAT32:
+ return sink<float>(index, buffer, length, io_layout);
+ case DataType::INT32:
+ return sink<int32_t>(index, buffer, length, io_layout);
+ case DataType::UINT32:
+ return sink<uint32_t>(index, buffer, length, io_layout);
+ case DataType::BOOL8:
+ case DataType::QUANT8_ASYMM:
+ case DataType::UINT8:
+ return sink<uint8_t>(index, buffer, length, io_layout);
+ case DataType::QUANT8_SYMM:
+ return sink<int8_t>(index, buffer, length, io_layout);
+ default:
+ throw std::runtime_error("Not supported yet");
+ }
+}
+
+void ExecutorBase::execute(const IODescription &desc)
+{
+ // For thread-safe, use mutex
+ // TODO: if all used backends on this executor are thread-safe,
+ // do not need to use mutex (otherwise, use mutex)
+ std::lock_guard<std::mutex> lock(_mutex);
+
+ std::vector<std::unique_ptr<ISource>> sources{_graph.getInputs().size()};
+ std::vector<std::unique_ptr<ISink>> sinks{_graph.getOutputs().size()};
+
+ // Set input(s)
+ for (uint32_t n = 0; n < _graph.getInputs().size(); ++n)
+ {
+ ir::IOIndex input_index{n};
+ ir::OperandIndex index{_graph.getInputs().at(input_index)};
+
+ if (desc.inputs.at(n) == nullptr)
+ {
+ // Optional input
+ continue;
+ }
+
+ const auto operand_li = _graph.getLowerInfo()->operand.at(index).get();
+ if (operand_li->def_factors().empty())
+ {
+ // This input is not used (i.e. constant, EX. reshape's axis)
+ continue;
+ }
+
+ const auto &input = *desc.inputs.at(n);
+ sources.at(n) =
+ source(input_index, input.info.typeInfo(), input.buffer, input.size, input.layout);
+
+ auto setter = [&](::neurun::backend::operand::ITensor &tensor) { sources.at(n)->push(tensor); };
+
+ auto object = _operand_context->at(index);
+
+ object->access(setter);
+ }
+
+ executeImpl();
+
+ // Get output(s)
+ for (uint32_t n = 0; n < _graph.getOutputs().size(); ++n)
+ {
+ ir::IOIndex output_index{n};
+ // Optional output
+ if (desc.outputs.at(n) == nullptr)
+ {
+ continue;
+ }
+ const auto &output = *desc.outputs.at(n);
+ sinks.at(n) =
+ sink(output_index, output.info.typeInfo(), output.buffer, output.size, output.layout);
+
+ auto getter = [&](::neurun::backend::operand::ITensor &tensor) { sinks.at(n)->pull(tensor); };
+
+ ir::OperandIndex index{_graph.getOutputs().at(output_index)};
+ auto object = _operand_context->at(index);
+
+ object->access(getter);
+ }
+}
+
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/ExecutorBase.h b/runtime/neurun/core/src/exec/ExecutorBase.h
new file mode 100644
index 000000000..a93e036a5
--- /dev/null
+++ b/runtime/neurun/core/src/exec/ExecutorBase.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_EXEC_EXECUTOR_BASE_H__
+#define __NEURUN_EXEC_EXECUTOR_BASE_H__
+
+#include <mutex>
+
+#include "Source.h"
+#include "exec/ExecutionObservers.h"
+#include "Sink.h"
+#include "exec/IExecutor.h"
+#include "ir/Graph.h"
+#include "ir/LowerInfoMap.h"
+#include "backend/IConfig.h"
+#include "backend/Backend.h"
+#include "compiler/OperandContext.h"
+#include "backend/ExecTime.h"
+#include "exec/IFunction.h"
+#include "backend/ITensorManager.h"
+#include "exec/ExecutionObservee.h"
+#include <list>
+
+namespace neurun
+{
+namespace exec
+{
+
+class ExecutorBase : public IExecutor
+{
+public:
+ ExecutorBase(const ir::Graph &graph,
+ const std::shared_ptr<compiler::OperandContext> &operand_context,
+ std::unique_ptr<backend::TensorManagerSet> tensor_mgrs);
+
+ virtual ~ExecutorBase() = default;
+
+ const ir::Graph &graph() final { return _graph; }
+
+ void execute(const IODescription &desc) final;
+
+ // Used only in Dataflow and Parallel Executors
+ void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>> ranks) final
+ {
+ _indexed_ranks = std::move(ranks);
+ };
+
+ virtual void executeImpl(void) = 0;
+
+ void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); };
+
+private:
+ std::unique_ptr<ISource> source(const ir::IOIndex &index, const ir::TypeInfo &type,
+ const void *buffer, size_t length, ir::Layout io_layout);
+ std::unique_ptr<ISink> sink(const ir::IOIndex &index, const ir::TypeInfo &type, void *buffer,
+ size_t length, ir::Layout io_layout);
+
+ template <typename T>
+ std::unique_ptr<ISource> source(const ir::IOIndex &index, const void *buffer, size_t length,
+ ir::Layout io_layout)
+ {
+ const auto operand_index = _graph.getInputs().at(index);
+ const auto &operand = _graph.operands().at(operand_index);
+
+ const auto tensor = _operand_context->at(operand_index);
+ const auto tensor_layout = tensor->layout();
+
+ if (((io_layout == ir::Layout::NHWC) && (tensor_layout == ir::Layout::NCHW)) ||
+ ((io_layout == ir::Layout::NCHW) && (tensor_layout == ir::Layout::NHWC)))
+ {
+ return nnfw::cpp14::make_unique<PermutateSource<T>>(buffer, length, operand.shape(),
+ io_layout);
+ }
+ // TODO Change this to return error
+ assert(io_layout != ir::Layout::UNKNOWN ||
+ (tensor_layout != ir::Layout::NCHW && tensor_layout != ir::Layout::NCHW));
+
+ return nnfw::cpp14::make_unique<CopySource<T>>(buffer, length, operand.shape());
+ }
+
+ template <typename T>
+ std::unique_ptr<ISink> sink(const ir::IOIndex &index, void *buffer, size_t length,
+ ir::Layout io_layout)
+ {
+ const auto operand_index = _graph.getOutputs().at(index);
+ const auto &operand = _graph.operands().at(operand_index);
+ const auto tensor = _operand_context->at(operand_index);
+ const auto tensor_layout = tensor->layout();
+
+ if (((tensor_layout == ir::Layout::NCHW) && (io_layout == ir::Layout::NHWC)) ||
+ ((tensor_layout == ir::Layout::NHWC) && (io_layout == ir::Layout::NCHW)))
+ {
+ return nnfw::cpp14::make_unique<PermutateSink<T>>(buffer, length, operand.shape(), io_layout);
+ }
+ // TODO Change this to return error
+ assert(io_layout != ir::Layout::UNKNOWN ||
+ (tensor_layout != ir::Layout::NCHW && tensor_layout != ir::Layout::NCHW));
+
+ return nnfw::cpp14::make_unique<CopySink<T>>(buffer, length, operand.shape());
+ }
+
+protected:
+ ExecutionObservee _subject;
+ std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
+ const ir::Graph &_graph;
+ std::shared_ptr<compiler::OperandContext> _operand_context;
+ std::unique_ptr<backend::TensorManagerSet> _tensor_mgrs;
+ std::mutex _mutex;
+};
+
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_EXECUTOR_BASE_H__
diff --git a/runtime/neurun/core/src/exec/FunctionSequence.cc b/runtime/neurun/core/src/exec/FunctionSequence.cc
new file mode 100644
index 000000000..00214fcfa
--- /dev/null
+++ b/runtime/neurun/core/src/exec/FunctionSequence.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FunctionSequence.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+void FunctionSequence::run()
+{
+ for (const auto &function : _functions)
+ {
+ function->run();
+ }
+}
+
+void FunctionSequence::runSync()
+{
+ for (const auto &function : _functions)
+ {
+ function->runSync();
+ }
+}
+
+void FunctionSequence::prepare()
+{
+ for (const auto &function : _functions)
+ {
+ function->prepare();
+ }
+}
+
+void FunctionSequence::append(std::unique_ptr<IFunction> &&function)
+{
+ _functions.push_back(std::move(function));
+}
+
+void FunctionSequence::iterate(const std::function<void(IFunction &)> &fn)
+{
+ for (const auto &func : _functions)
+ {
+ fn(*func);
+ }
+}
+
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/FunctionSequence.h b/runtime/neurun/core/src/exec/FunctionSequence.h
new file mode 100644
index 000000000..2ba5c0b08
--- /dev/null
+++ b/runtime/neurun/core/src/exec/FunctionSequence.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_EXEC_FUNCTION_SEQUENCE_H__
+#define __NEURUN_EXEC_FUNCTION_SEQUENCE_H__
+
+#include <memory>
+#include <vector>
+#include <functional>
+
+#include "exec/IFunction.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+class FunctionSequence : public IFunction
+{
+public:
+ virtual ~FunctionSequence() = default;
+
+ void run() override;
+ void runSync() override;
+ void prepare() override;
+
+ /**
+ * @brief Appends an IFunction object to the function sequence
+ *
+ * @param function IFunction object to be appended
+ */
+ void append(std::unique_ptr<IFunction> &&function);
+
+ void iterate(const std::function<void(IFunction &)> &fn);
+
+private:
+ std::vector<std::unique_ptr<IFunction>> _functions;
+};
+
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_FUNCTION_SEQUENCE_H__
diff --git a/runtime/neurun/core/src/exec/Job.cc b/runtime/neurun/core/src/exec/Job.cc
new file mode 100644
index 000000000..ba02daf30
--- /dev/null
+++ b/runtime/neurun/core/src/exec/Job.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Job.h"
+
+#include <cassert>
+
+#include "util/logging.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+Job::Job(uint32_t index, IFunction *fn) : _index{index}, _fn{fn} {}
+
+void Job::run() { _fn->run(); }
+
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/Job.h b/runtime/neurun/core/src/exec/Job.h
new file mode 100644
index 000000000..1516b9281
--- /dev/null
+++ b/runtime/neurun/core/src/exec/Job.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_EXEC_JOB_H__
+#define __NEURUN_EXEC_JOB_H__
+
+#include <unordered_set>
+
+#include "exec/IFunction.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/Backend.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+class Job
+{
+public:
+ /**
+ * @brief Constructs a Job object
+ *
+ * @param index Operation index for this job
+ * @param fn compiled code to run this job
+ * @param inputs Input operand list
+ * @param outputs Output operand list
+ */
+ Job(uint32_t index, IFunction *fn);
+ /**
+ * @brief Execute the compiled code
+ */
+ void run();
+ /**
+ * @brief Return job index
+ *
+ * @return Job index
+ */
+ uint32_t index() const { return _index; }
+ /**
+ * @brief Return the function to be executed
+ *
+ * @return Pointer of the function
+ */
+ IFunction *fn() { return _fn; }
+
+private:
+ uint32_t _index;
+ IFunction *_fn;
+};
+
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_JOB_H__
diff --git a/runtime/neurun/core/src/exec/LinearExecutor.cc b/runtime/neurun/core/src/exec/LinearExecutor.cc
new file mode 100644
index 000000000..d41dba880
--- /dev/null
+++ b/runtime/neurun/core/src/exec/LinearExecutor.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LinearExecutor.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+void LinearExecutor::executeImpl()
+{
+ _subject.notifyModelBegin(this);
+ for (auto &&code : _code)
+ {
+ const auto op_seq = code.elem.op_seq;
+ const auto backend = code.elem.lower_info->backend();
+ _subject.notifyJobBegin(this, op_seq, backend);
+ code.fn->run();
+ _subject.notifyJobEnd(this, op_seq, backend);
+ }
+ _subject.notifyModelEnd(this);
+}
+
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/LinearExecutor.h b/runtime/neurun/core/src/exec/LinearExecutor.h
new file mode 100644
index 000000000..baf063a12
--- /dev/null
+++ b/runtime/neurun/core/src/exec/LinearExecutor.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file LinearExecutor.h
+ * @brief This file contains LinearExecutor class to define and run execution phase
+ */
+
+#ifndef __NEURUN_EXEC_EXECUTOR_H_
+#define __NEURUN_EXEC_EXECUTOR_H_
+
+#include "ExecutorBase.h"
+#include "compiler/Linear.h"
+#include "exec/FunctionSequence.h"
+#include "compiler/CodeWithInfo.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+/**
+ * @brief Class to handle execution phase. Simple run the sequence of operations that is sorted in
+ * topological order
+ */
+class LinearExecutor final : public ExecutorBase
+{
+public:
+ /**
+ * @brief Construct a new LinearExecutor object
+ * @param[in] plan Execution plan generated by compiled result
+ */
+ LinearExecutor(const ir::Graph &graph,
+ const std::shared_ptr<compiler::OperandContext> &operand_context,
+ std::unique_ptr<backend::TensorManagerSet> tensor_mgrs,
+ std::vector<compiler::CodeWithInfo> &&code)
+ : ExecutorBase{graph, operand_context, std::move(tensor_mgrs)}, _code{std::move(code)}
+ {
+ }
+
+public:
+ void executeImpl(void) override;
+
+private:
+ std::vector<compiler::CodeWithInfo> _code;
+};
+
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_EXECUTOR_H_
diff --git a/runtime/neurun/core/src/exec/ParallelExecutor.cc b/runtime/neurun/core/src/exec/ParallelExecutor.cc
new file mode 100644
index 000000000..c73c353d3
--- /dev/null
+++ b/runtime/neurun/core/src/exec/ParallelExecutor.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ParallelExecutor.h"
+
+#include <cassert>
+
+#include "util/logging.h"
+#include "exec/IFunction.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+class HookFunction : public IFunction
+{
+public:
+ HookFunction(IFunction *fn, const std::function<void()> &setup,
+ const std::function<void()> &teardown)
+ : _fn{fn}, _setup{setup}, _teardown{teardown}
+ {
+ }
+
+public:
+ void run() override
+ {
+ _setup();
+ _fn->run();
+ _teardown();
+ }
+ void runSync() override { throw("runSync is needed just for profiling in Dataflow executor"); }
+
+private:
+ IFunction *_fn;
+ std::function<void()> _setup;
+ std::function<void()> _teardown;
+};
+
+void ParallelExecutor::notify(uint32_t finished_job_id)
+{
+ std::unique_lock<std::mutex> lock{_mu_jobs};
+
+ DataflowExecutor::notify(finished_job_id);
+
+ lock.unlock();
+ _cv_jobs.notify_all();
+}
+
+ParallelExecutor::ParallelExecutor(const ir::Graph &graph,
+ const std::shared_ptr<compiler::OperandContext> &operand_context,
+ std::unique_ptr<backend::TensorManagerSet> tensor_mgrs,
+ CodeMap &&code_map)
+ : DataflowExecutor{graph, operand_context, std::move(tensor_mgrs), std::move(code_map)}
+{
+ VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl;
+}
+
+void ParallelExecutor::executeImpl()
+{
+ // Init scheduler
+ // TODO Consider to have distinct backend set in LowerInfoMap
+ ir::BackendSet backends;
+ for (auto &itr : _graph.getLowerInfo()->operation)
+ {
+ backends.add(itr.second->backend());
+ }
+ _scheduler = nnfw::cpp14::make_unique<ParallelScheduler>(backends);
+
+ assert(noWaitingJobs());
+
+ // Execution setup
+ _waiting_jobs.swap(_finished_jobs); // Move finished jobs to waiting jobs
+
+ for (uint32_t i = 0; i < _waiting_jobs.size(); ++i)
+ {
+ VERBOSE(ParallelExecutor) << i << ": " << _input_info[i] << std::endl;
+ if (_input_info[i] == 0)
+ {
+ emplaceToReadyJobs(i);
+ }
+ }
+ assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs
+
+ VERBOSE(ParallelExecutor) << "INITIAL JOBS : " << _ready_jobs.size() << std::endl;
+
+ _subject.notifyModelBegin(this);
+ while (true)
+ {
+ std::unique_lock<std::mutex> lock{_mu_jobs};
+
+ if (_ready_jobs.empty())
+ {
+ _cv_jobs.wait(lock, [this] { return !_ready_jobs.empty() || noWaitingJobs(); });
+ // Check finish condition
+ if (_ready_jobs.empty() && noWaitingJobs())
+ {
+ break;
+ }
+ }
+
+ auto job = std::move(_ready_jobs.begin()->second);
+ _ready_jobs.erase(_ready_jobs.begin());
+
+ lock.unlock();
+
+ VERBOSE(ParallelExecutor) << "Assigning fn #" << job->index() << std::endl;
+
+ auto job_index = job->index();
+ auto subgraph_index = _job_to_op_seq[job_index];
+ auto op_seq = &_graph.subgraphs()->at(subgraph_index);
+ auto backend = _graph.getLowerInfo()->operation.at(subgraph_index)->backend();
+ auto setup = [&, op_seq, backend]() { _subject.notifyJobBegin(this, op_seq, backend); };
+ auto teardown = [&, job_index, op_seq, backend]() {
+ _subject.notifyJobEnd(this, op_seq, backend);
+ notify(job_index);
+ };
+
+ _scheduler->assign(nnfw::cpp14::make_unique<HookFunction>(job->fn(), setup, teardown), backend);
+ _finished_jobs[job_index] = std::move(job);
+ }
+
+ assert(noWaitingJobs());
+
+ // Wait for all the jobs done
+ _scheduler->finish();
+ _subject.notifyModelEnd(this);
+
+ // Reset input info for the next execution
+ _input_info = _initial_input_info;
+}
+
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/ParallelExecutor.h b/runtime/neurun/core/src/exec/ParallelExecutor.h
new file mode 100644
index 000000000..54377fd9e
--- /dev/null
+++ b/runtime/neurun/core/src/exec/ParallelExecutor.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_EXEC_PARALLEL_EXECUTOR_H__
+#define __NEURUN_EXEC_PARALLEL_EXECUTOR_H__
+
+#include <list>
+#include <queue>
+#include <unordered_map>
+
+#include "FunctionSequence.h"
+#include "Job.h"
+#include "ir/OperandIndexSequence.h"
+#include "ir/Index.h"
+#include "cpp14/memory.h"
+#include "exec/DataflowExecutor.h"
+#include "ParallelScheduler.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+/**
+ * @brief Class to execute Graph in parallel
+ */
+class ParallelExecutor : public DataflowExecutor
+{
+protected:
+ void notify(uint32_t finished_job_id) override;
+
+public:
+ /**
+ * @brief Constructs a ParallelExecutor object
+ *
+ * @param graph Graph object
+ * @param operand_context (Only for input/output operand data access)
+ * @param code_map Compiled code map
+ * @param ranks Operation ranks for ordering execution
+ */
+ ParallelExecutor(const ir::Graph &graph,
+ const std::shared_ptr<compiler::OperandContext> &operand_context,
+ std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, CodeMap &&code_map);
+
+ void executeImpl() override;
+
+private:
+ std::condition_variable _cv_jobs;
+ std::mutex _mu_jobs;
+ std::unique_ptr<ParallelScheduler> _scheduler;
+};
+
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_PARALLEL_EXECUTOR_H__
diff --git a/runtime/neurun/core/src/exec/ParallelScheduler.cc b/runtime/neurun/core/src/exec/ParallelScheduler.cc
new file mode 100644
index 000000000..5f9e9e013
--- /dev/null
+++ b/runtime/neurun/core/src/exec/ParallelScheduler.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ParallelScheduler.h"
+
+#include <cassert>
+
+#include "cpp14/memory.h"
+#include "util/logging.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+ParallelScheduler::ParallelScheduler(const ir::BackendSet &backends)
+{
+ assert(!backends.empty());
+
+ for (auto backend : backends)
+ {
+ _thread_pools[backend] = nnfw::cpp14::make_unique<ThreadPool>();
+ }
+}
+
+void ParallelScheduler::assign(std::unique_ptr<IFunction> &&fn, const backend::Backend *backend)
+{
+ assert(!_thread_pools.empty());
+
+ _thread_pools.at(backend)->enqueue(std::move(fn));
+}
+
+void ParallelScheduler::finish()
+{
+ for (auto &itr : _thread_pools)
+ {
+ itr.second->finish();
+ }
+}
+
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/ParallelScheduler.h b/runtime/neurun/core/src/exec/ParallelScheduler.h
new file mode 100644
index 000000000..af1103750
--- /dev/null
+++ b/runtime/neurun/core/src/exec/ParallelScheduler.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_EXEC_PARALLEL_SCHEDULER_H__
+#define __NEURUN_EXEC_PARALLEL_SCHEDULER_H__
+
+#include <unordered_map>
+#include <memory>
+
+#include "exec/IFunction.h"
+#include "ir/BackendSet.h"
+#include "ThreadPool.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+class ParallelScheduler
+{
+public:
+ /**
+ * @brief Constructs ParallelScheduler object
+ *
+ * @param backends Backend set
+ */
+ ParallelScheduler(const ir::BackendSet &backends);
+ /**
+ * @brief Assign a task to the given backend
+ *
+ * @param[in] fn Function to be assigned
+ * @param[in] fn Target backend
+ */
+ void assign(std::unique_ptr<IFunction> &&fn, const backend::Backend *backend);
+ /**
+ * @brief Block until all jobs are finished
+ */
+ void finish();
+
+private:
+ std::unordered_map<const backend::Backend *, std::unique_ptr<ThreadPool>> _thread_pools;
+};
+
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_PARALLEL_SCHEDULER_H__
diff --git a/runtime/neurun/core/src/exec/Sink.h b/runtime/neurun/core/src/exec/Sink.h
new file mode 100644
index 000000000..bb2a6c58a
--- /dev/null
+++ b/runtime/neurun/core/src/exec/Sink.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_EXEC_SINK_H__
+#define __NEURUN_EXEC_SINK_H__
+
+#include <cassert>
+
+#include "cpp14/memory.h"
+#include "util/feature/nchw/Reader.h"
+#include "util/feature/nchw/View.h"
+#include "util/feature/nhwc/Reader.h"
+#include "util/feature/nhwc/View.h"
+#include "util/Utils.h"
+#include <misc/feature/IndexIterator.h>
+
+namespace neurun
+{
+namespace exec
+{
+struct ISink
+{
+ virtual ~ISink() = default;
+
+ virtual void pull(::neurun::backend::operand::ITensor &tensor) const = 0;
+};
+
+// Create second lever inheritance: the first lever is used as a reference type in use-case places
+template <typename T> class ITemplSink : public ISink
+{
+public:
+ ITemplSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
+ const bool copy, ir::Layout io_layout)
+ : _output_buffer{reinterpret_cast<T *>(output_buffer)}, _output_size{output_size},
+ _shape{shape}, _copy{copy}, _io_layout{io_layout}
+ {
+ }
+
+protected:
+ void pullUnif(neurun::backend::operand::ITensor &tensor) const
+ {
+ assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) ||
+ (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) ||
+ _copy);
+ auto input_buffer = tensor.buffer();
+ auto rank = _shape.rank();
+
+ if (!tensor.has_padding() && rank < 4 + _copy)
+ {
+ memcpy(_output_buffer, input_buffer, _output_size);
+ return;
+ }
+
+ switch (rank)
+ {
+ case 0:
+ case 1:
+ {
+ memcpy(_output_buffer, input_buffer, _output_size);
+ break;
+ }
+ case 2:
+ {
+ const int32_t copy_len = _shape.dim(1);
+
+ for (auto i = 0; i < _shape.dim(0); ++i)
+ {
+ neurun::util::Coordinates coords{i, 0};
+ memcpy(_output_buffer + i * copy_len, input_buffer + tensor.calcOffset(coords),
+ copy_len * sizeof(T));
+ }
+ break;
+ }
+ case 3:
+ {
+ const int32_t dim1 = _shape.dim(1);
+ const int32_t dim2 = _shape.dim(2);
+
+ for (auto i = 0; i < _shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < _shape.dim(1); ++j)
+ {
+ neurun::util::Coordinates coords{i, j, 0};
+ memcpy(_output_buffer + i * dim1 * dim2 + j * dim2,
+ input_buffer + tensor.calcOffset(coords), dim2 * sizeof(T));
+ }
+ }
+ break;
+ }
+ case 4:
+ {
+ if (_copy)
+ {
+ const int32_t dim1 = _shape.dim(1);
+ const int32_t dim2 = _shape.dim(2);
+ const int32_t dim3 = _shape.dim(3);
+
+ for (auto i = 0; i < _shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < _shape.dim(1); ++j)
+ {
+ for (auto k = 0; k < _shape.dim(2); ++k)
+ {
+ neurun::util::Coordinates coords{i, j, k, 0};
+ memcpy(_output_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3,
+ input_buffer + tensor.calcOffset(coords), dim3 * sizeof(T));
+ }
+ }
+ }
+ }
+ else
+ {
+ const auto shape = _shape.asFeature(_io_layout);
+
+ if (_io_layout == ir::Layout::NHWC)
+ {
+ const util::feature::nchw::Reader<T> from(&tensor);
+ util::feature::nhwc::View<T> into(shape, _output_buffer, _output_size);
+ ::nnfw::misc::feature::iterate(shape)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, row, col, ch) = value;
+ };
+ }
+ else if (_io_layout == ir::Layout::NCHW)
+ {
+ const util::feature::nhwc::Reader<T> from(&tensor);
+ util::feature::nchw::View<T> into(shape, _output_buffer, _output_size);
+ ::nnfw::misc::feature::iterate(shape)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, row, col, ch);
+ into.at(batch, ch, row, col) = value;
+ };
+ }
+ else
+ {
+ throw std::runtime_error("Wrong Layout");
+ }
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI");
+ break;
+ }
+ }
+
+private:
+ T *_output_buffer;
+ const size_t _output_size;
+ const ir::Shape _shape;
+ const bool _copy;
+ const ir::Layout _io_layout;
+};
+
+template <typename T> class PermutateSink final : public ITemplSink<T>
+{
+public:
+ PermutateSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
+ ir::Layout io_layout)
+ : ITemplSink<T>(output_buffer, output_size, shape, false, io_layout)
+ {
+ }
+
+public:
+ void pull(neurun::backend::operand::ITensor &tensor) const override
+ {
+ ITemplSink<T>::pullUnif(tensor);
+ }
+};
+
+// Only supports NHWC format front-end(NNAPI) now
+template <typename T> class CopySink final : public ITemplSink<T>
+{
+public:
+ CopySink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
+ ir::Layout io_layout = ir::Layout::UNKNOWN)
+ : ITemplSink<T>(output_buffer, output_size, shape, true, io_layout)
+ {
+ }
+
+public:
+ void pull(neurun::backend::operand::ITensor &tensor) const override
+ {
+ ITemplSink<T>::pullUnif(tensor);
+ }
+};
+
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_SINK_H__
diff --git a/runtime/neurun/core/src/exec/Source.h b/runtime/neurun/core/src/exec/Source.h
new file mode 100644
index 000000000..fd52dd546
--- /dev/null
+++ b/runtime/neurun/core/src/exec/Source.h
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_EXEC_SOURCE_H__
+#define __NEURUN_EXEC_SOURCE_H__
+
+#include <cassert>
+
+#include "cpp14/memory.h"
+#include "util/feature/nchw/Reader.h"
+#include "util/feature/nchw/View.h"
+#include "util/feature/nhwc/Reader.h"
+#include "util/feature/nhwc/View.h"
+#include "util/Utils.h"
+#include <misc/feature/IndexIterator.h>
+#include <ir/Layout.h>
+#include "ir/Shape.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+struct ISource
+{
+ virtual ~ISource() = default;
+
+ virtual void push(::neurun::backend::operand::ITensor &tensor) const = 0;
+};
+
+// Create second lever inheritance: the first lever is used as a reference type in use-case places
+template <typename T> class ITemplSource : public ISource
+{
+public:
+ ITemplSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
+ const bool copy, ir::Layout io_layout)
+ : _input_buffer{reinterpret_cast<const T *>(input_buffer)}, _input_size{input_size},
+ _shape{shape}, _copy(copy), _io_layout{io_layout}
+ {
+ }
+
+ virtual void push(::neurun::backend::operand::ITensor &tensor) const = 0;
+
+protected:
+ void pushUnif(neurun::backend::operand::ITensor &tensor) const
+ {
+ assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) ||
+ (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) ||
+ _copy);
+ auto output_buffer = tensor.buffer();
+ auto rank = _shape.rank();
+
+ if (!tensor.has_padding() && rank < 4 + _copy)
+ {
+ memcpy(output_buffer, _input_buffer, _input_size);
+ return;
+ }
+
+ switch (rank)
+ {
+ case 0:
+ case 1:
+ {
+ memcpy(output_buffer, _input_buffer, _input_size);
+ break;
+ }
+ case 2:
+ {
+ const int32_t copy_len = _shape.dim(1);
+
+ for (auto i = 0; i < _shape.dim(0); ++i)
+ {
+ neurun::util::Coordinates coords{i, 0};
+ memcpy(output_buffer + tensor.calcOffset(coords), _input_buffer + i * copy_len,
+ copy_len * sizeof(T));
+ }
+ break;
+ }
+ case 3:
+ {
+ const int32_t dim1 = _shape.dim(1);
+ const int32_t dim2 = _shape.dim(2);
+
+ for (auto i = 0; i < _shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < _shape.dim(1); ++j)
+ {
+ neurun::util::Coordinates coords{i, j, 0};
+ memcpy(output_buffer + tensor.calcOffset(coords),
+ _input_buffer + i * dim1 * dim2 + j * dim2, dim2 * sizeof(T));
+ }
+ }
+ break;
+ }
+ case 4:
+ {
+ if (_copy)
+ {
+ const int32_t dim1 = _shape.dim(1);
+ const int32_t dim2 = _shape.dim(2);
+ const int32_t dim3 = _shape.dim(3);
+ for (auto i = 0; i < _shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < _shape.dim(1); ++j)
+ {
+ for (auto k = 0; k < _shape.dim(2); ++k)
+ {
+ neurun::util::Coordinates coords{i, j, k, 0};
+ memcpy(output_buffer + tensor.calcOffset(coords),
+ _input_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3,
+ dim3 * sizeof(T));
+ }
+ }
+ }
+ }
+ else
+ {
+ const auto shape = _shape.asFeature(_io_layout);
+
+ if (_io_layout == ir::Layout::NCHW)
+ {
+ const util::feature::nchw::Reader<T> from(shape, _input_buffer, _input_size);
+ util::feature::nhwc::View<T> into(&tensor);
+ ::nnfw::misc::feature::iterate(shape)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, row, col, ch) = value;
+ };
+ }
+ else if (_io_layout == ir::Layout::NHWC)
+ {
+ const util::feature::nhwc::Reader<T> from(shape, _input_buffer, _input_size);
+ util::feature::nchw::View<T> into(&tensor);
+ ::nnfw::misc::feature::iterate(shape)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, row, col, ch);
+ into.at(batch, ch, row, col) = value;
+ };
+ }
+ else
+ {
+ throw std::runtime_error("Wrong Layout");
+ }
+ }
+
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI");
+ break;
+ }
+ }
+
+private:
+ const T *_input_buffer;
+ const size_t _input_size;
+ const ir::Shape _shape;
+ const bool _copy;
+ const ir::Layout _io_layout;
+};
+
+template <typename T> class PermutateSource final : public ITemplSource<T>
+{
+public:
+ PermutateSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
+ ir::Layout io_layout)
+ : ITemplSource<T>(input_buffer, input_size, shape, false, io_layout)
+ {
+ }
+
+public:
+ void push(neurun::backend::operand::ITensor &tensor) const override
+ {
+ // do NHWC_TO_NCHW or NCHW_TO_NHWC permutation
+ ITemplSource<T>::pushUnif(tensor);
+ }
+};
+
+template <typename T> class CopySource final : public ITemplSource<T>
+{
+public:
+ CopySource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
+ ir::Layout io_layout = ir::Layout::UNKNOWN)
+ : ITemplSource<T>(input_buffer, input_size, shape, true, io_layout)
+ {
+ }
+
+public:
+ void push(neurun::backend::operand::ITensor &tensor) const override
+ {
+ ITemplSource<T>::pushUnif(tensor);
+ }
+};
+
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_SOURCE_H__
diff --git a/runtime/neurun/core/src/exec/ThreadPool.cc b/runtime/neurun/core/src/exec/ThreadPool.cc
new file mode 100644
index 000000000..d8c706e30
--- /dev/null
+++ b/runtime/neurun/core/src/exec/ThreadPool.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ThreadPool.h"
+
+#include <cassert>
+
+namespace neurun
+{
+namespace exec
+{
+
+ThreadPool::ThreadPool(uint32_t num_threads)
+{
+ assert(num_threads >= 1);
+
+ for (uint32_t i = 0; i < num_threads; i++)
+ {
+ _threads.emplace_back(std::ref(_worker));
+ }
+}
+
+ThreadPool::~ThreadPool()
+{
+ if (!_threads.empty())
+ {
+ _worker.terminate();
+ join();
+ }
+}
+
+void ThreadPool::enqueue(std::unique_ptr<IFunction> &&fn) { _worker.enqueue(std::move(fn)); }
+
+uint32_t ThreadPool::numJobsInQueue() { return _worker.numJobsInQueue(); }
+
+void ThreadPool::join()
+{
+ for (auto &thread : _threads)
+ {
+ thread.join();
+ }
+ _threads.clear();
+}
+
+void ThreadPool::finish()
+{
+ _worker.finish();
+ join();
+}
+
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/ThreadPool.h b/runtime/neurun/core/src/exec/ThreadPool.h
new file mode 100644
index 000000000..a1a027617
--- /dev/null
+++ b/runtime/neurun/core/src/exec/ThreadPool.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_EXEC_THREAD_POOL_H__
+#define __NEURUN_EXEC_THREAD_POOL_H__
+
+#include <thread>
+#include <memory>
+#include <vector>
+
+#include "WorkQueue.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+class ThreadPool
+{
+public:
+ /**
+ * @brief Coustruct ThreadPool object
+ *
+ * @param num_threads Number of threads
+ */
+ ThreadPool(uint32_t num_threads = 1);
+ /**
+ * @brief Destroy ThreadPool object
+ */
+ ~ThreadPool();
+ /**
+ * @brief Enqueue a function
+ *
+ * @param fn A function to be queued
+ */
+ void enqueue(std::unique_ptr<IFunction> &&fn);
+ /**
+ * @brief Get number of jobs in worker's queue
+ *
+ * @return Number of jobs
+ */
+ uint32_t numJobsInQueue();
+
+ /**
+ * @brief Block until all jobs are finished
+ */
+ void finish();
+
+private:
+ void join();
+
+private:
+ WorkQueue _worker;
+ std::vector<std::thread> _threads;
+};
+
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_THREAD_POOL_H__
diff --git a/runtime/neurun/core/src/exec/WorkQueue.cc b/runtime/neurun/core/src/exec/WorkQueue.cc
new file mode 100644
index 000000000..6712554ac
--- /dev/null
+++ b/runtime/neurun/core/src/exec/WorkQueue.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "WorkQueue.h"
+
+#include <cassert>
+
+namespace neurun
+{
+namespace exec
+{
+
+WorkQueue::~WorkQueue()
+{
+ {
+ std::unique_lock<std::mutex> lock(_mu);
+ _state = State::FORCE_FINISHING;
+ }
+ _cv.notify_all();
+}
+
+void WorkQueue::operator()()
+{
+ while (true)
+ {
+ std::unique_ptr<IFunction> fn = nullptr;
+
+ {
+ std::unique_lock<std::mutex> lock{_mu};
+ _cv.wait(lock, [this] {
+ return (_state == State::FORCE_FINISHING) || (_state == State::FINISHING) ||
+ (_state == State::ONLINE && !_functions.empty());
+ });
+
+ if (_state == State::FORCE_FINISHING)
+ {
+ assert(_functions.empty() && "Terminating with unfinished jobs");
+ return;
+ }
+ else if (_state == State::FINISHING && _functions.empty())
+ {
+ return;
+ }
+ else
+ {
+ assert(((_state == State::FINISHING) || (_state == State::ONLINE)) && !_functions.empty());
+ fn = std::move(_functions.front());
+ _functions.pop();
+ }
+ }
+
+ assert(fn);
+ fn->run();
+ }
+}
+
+void WorkQueue::enqueue(std::unique_ptr<IFunction> &&fn)
+{
+ {
+ std::unique_lock<std::mutex> lock{_mu};
+ _functions.emplace(std::move(fn));
+ }
+ _cv.notify_one();
+}
+
+void WorkQueue::terminate()
+{
+ {
+ std::unique_lock<std::mutex> lock{_mu};
+ _state = State::FORCE_FINISHING;
+ }
+ _cv.notify_all();
+}
+
+void WorkQueue::finish()
+{
+ {
+ std::unique_lock<std::mutex> lock{_mu};
+ _state = State::FINISHING;
+ }
+ _cv.notify_all();
+}
+
+uint32_t WorkQueue::numJobsInQueue()
+{
+ std::unique_lock<std::mutex> lock{_mu};
+ return _functions.size();
+}
+
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/WorkQueue.h b/runtime/neurun/core/src/exec/WorkQueue.h
new file mode 100644
index 000000000..cdbadfb8f
--- /dev/null
+++ b/runtime/neurun/core/src/exec/WorkQueue.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_EXEC_WORK_QUEUE_H__
+#define __NEURUN_EXEC_WORK_QUEUE_H__
+
+#include <condition_variable>
+#include <memory>
+#include <mutex>
+#include <queue>
+
+#include "exec/IFunction.h"
+
+namespace neurun
+{
+namespace exec
+{
+
+class WorkQueue
+{
+public:
+ enum class State
+ {
+ ONLINE,
+ FINISHING,
+ FORCE_FINISHING
+ };
+
+public:
+ /**
+ * @brief Create WorkQueue object
+ */
+ WorkQueue() = default;
+ /**
+ * @brief Destroy WorkQueue object
+ */
+ ~WorkQueue();
+ /**
+ * @brief Thread entry function
+ */
+ void operator()();
+ /**
+ * @brief Push the given Task to the job queue
+ *
+ * @param fn Function to be executed(a job)
+ */
+ void enqueue(std::unique_ptr<IFunction> &&fn);
+ /**
+ * @brief Flag as terminating so all the worker threads can terminate
+ */
+ void terminate();
+ /**
+ * @brief Flag as terminating so all the worker threads can terminate
+ */
+ void finish();
+ /**
+ * @brief Check if it has pending jobs. Even if this returns fals, WorkQueue threads may be still
+ * running
+ *
+ * @return true if the job queue not empty otherwise false
+ */
+ uint32_t numJobsInQueue();
+
+private:
+ State _state{State::ONLINE};
+ std::queue<std::unique_ptr<IFunction>> _functions;
+ std::mutex _mu;
+ std::condition_variable _cv;
+};
+
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_WORK_QUEUE_H__
diff --git a/runtime/neurun/core/src/exec/interp/Buffer.h b/runtime/neurun/core/src/exec/interp/Buffer.h
new file mode 100644
index 000000000..d60b59a2f
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/Buffer.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Buffer.h
+ * @brief This file contains Buffer interface and InternalBuffer, ExternalBuffer class
+ */
+#ifndef __NEURUN_EXEC_INTERP_BUFFER_H__
+#define __NEURUN_EXEC_INTERP_BUFFER_H__
+
+#include <cpp14/memory.h>
+
+#include "ir/Data.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+
+/**
+ * @brief Interface for writable data area
+ */
+class Buffer : public ir::Data
+{
+public:
+ /**
+ * @brief Return writable pointer for data area
+ * @return Writable pointer
+ */
+ virtual uint8_t *baseWritable(void) const = 0;
+};
+
+/**
+ * @brief Class for internally allocated data area
+ */
+class InternalBuffer final : public Buffer
+{
+public:
+ InternalBuffer(size_t size) : _base{nnfw::cpp14::make_unique<uint8_t[]>(size)}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ size_t size(void) const override { return _size; }
+ const uint8_t *base(void) const override { return _base.get(); }
+ uint8_t *baseWritable(void) const override { return _base.get(); }
+
+private:
+ std::unique_ptr<uint8_t[]> _base;
+ size_t _size;
+};
+
+/**
+ * @brief Class for data area from outside
+ */
+class ExternalBuffer final : public Buffer
+{
+public:
+ ExternalBuffer(uint8_t *base, size_t size) : _base{base}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ size_t size(void) const override { return _size; }
+ const uint8_t *base(void) const override { return _base; }
+ uint8_t *baseWritable(void) const override { return _base; }
+
+private:
+ uint8_t *_base;
+ size_t _size;
+};
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_INTERP_BUFFER_H__
diff --git a/runtime/neurun/core/src/exec/interp/ExecEnv.h b/runtime/neurun/core/src/exec/interp/ExecEnv.h
new file mode 100644
index 000000000..0f7d45e2a
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/ExecEnv.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ExecEnv.h
+ * @brief This file contains ExecEnv to access interpreter tensor and execution status
+ */
+#ifndef __NEURUN_EXEC_INTERP_EXEC_ENV_H_
+#define __NEURUN_EXEC_INTERP_EXEC_ENV_H_
+
+#include <unordered_set>
+
+#include "ir/Graph.h"
+#include "Tensor.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+
+/**
+ * @brief Class to gather interpreter execution environment
+ * Each interpreter instance own execution environment
+ */
+class ExecEnv
+{
+public:
+ /**
+ * @brief Construct a new Exec Env object (deleted)
+ */
+ ExecEnv(void) = delete;
+ /**
+ * @brief Construct a new ExecEnv object
+ * @param[in] graph Graph to execute by interpreter
+ */
+ explicit ExecEnv(const ir::Graph &graph) : _graph(graph)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Return graph to execute
+ * @return Graph
+ */
+ const ir::Graph &graph(void) const { return _graph; }
+ /**
+ * @brief Assign tensor to environment which have allocated or assigned buffer
+ * @param[in] index Tensor index
+ * @param[in] tensor Tensor
+ */
+ void assignTensor(const ir::OperandIndex index, std::shared_ptr<ITensor> tensor)
+ {
+ assert(tensor->bufferRO() != nullptr);
+ _tensors.emplace(index, tensor);
+ }
+
+ /**
+ * @brief Return tensor pointer in environment
+ * @param[in] index Tensor index
+ * @return Tensor pointer
+ */
+ const ITensor *tensorAt(const ir::OperandIndex index) const { return _tensors.at(index).get(); }
+
+ /**
+ * @brief Check environment contains tensor
+ * @param[in] index Tensor index
+ * @return @c true if environment contain tensor, otherwise @c false
+ */
+ bool contains(const ir::OperandIndex index) const
+ {
+ return (_tensors.find(index) != _tensors.end());
+ }
+
+ /**
+ * @brief Allocate tensor using operand info
+ * @param[in] index Tensor index
+ * @param[in] info Operand info
+ * @note If already allocated, just return
+ * @TODO More smart allocation policy
+ */
+ void allocateIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info)
+ {
+ // already allocated, or constant
+ if (contains(index))
+ {
+ return;
+ }
+
+ auto tensor = std::make_shared<Tensor>(info);
+ tensor->setBuffer(std::make_shared<InternalBuffer>(tensor->total_size()));
+ assignTensor(index, tensor);
+ _buffers.insert(index);
+ }
+
+ /**
+ * @brief Allocate read-only tensor and share data with other tensor
+ * @param[in] index Tensor index
+ * @param[in] info Operand info
+ * @param[in] index_to_share Tensor index that have data to share
+ */
+ void allocateAndShareIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info,
+ const ir::OperandIndex index_to_share)
+ {
+ if (!contains(index_to_share))
+ {
+ throw std::runtime_error{"Cannot find tensor to share data"};
+ }
+
+ // already allocated
+ if (contains(index))
+ {
+ return;
+ }
+ else
+ {
+ auto tensor = std::make_shared<ROTensor>(info);
+ tensor->setData(tensorAt(index_to_share)->shareData());
+ assignTensor(index, tensor);
+ _buffers.insert(index);
+ }
+ }
+
+ /**
+ * @brief Free buffer if allocated by allocateIfNeed
+ * @param[in] index Tensor index
+ * @note If allocated by outside, just return
+ */
+ void freeIfAllocated(const ir::OperandIndex index)
+ {
+ if (_buffers.find(index) != _buffers.end())
+ {
+ _tensors.at(index)->releaseData();
+ }
+ }
+
+private:
+ const ir::Graph &_graph;
+ // Tensor map to use in interpreter
+ // It should map tensors that have allocated or assigned buffer pointer
+ std::unordered_map<ir::OperandIndex, std::shared_ptr<ITensor>> _tensors;
+ // Tensors allocated by allocateIfNeed (buffer)
+ std::unordered_set<ir::OperandIndex> _buffers;
+};
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_INTERP_EXEC_ENV_H_
diff --git a/runtime/neurun/core/src/exec/interp/ExecManager.cc b/runtime/neurun/core/src/exec/interp/ExecManager.cc
new file mode 100644
index 000000000..92f182c06
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/ExecManager.cc
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecManager.h"
+#include "ExecEnv.h"
+#include "Interpreter.h"
+
+#include "util/logging.h"
+
+#include <cpp14/memory.h>
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+
+void ExecManager::execute(const IODescription &desc)
+{
+ /************************************************************************
+ * Prepare execution model (submodel)
+ It may execute divided model
+ but now consider model inference is done at interpreter
+ ***********************************************************************/
+ ir::OperandIndexMap<std::shared_ptr<ITensor>> tensor_map;
+
+ for (uint32_t n = 0; n < _graph.getInputs().size(); n++)
+ {
+ ir::IOIndex index{n};
+ const auto input_index = _graph.getInputs().at(index);
+ const auto &input = *desc.inputs.at(n);
+
+ auto input_tensor = std::make_shared<ROTensor>(input.info);
+ input_tensor->setData(std::make_shared<const ir::ExternalData>(
+ reinterpret_cast<const uint8_t *>(input.buffer), input.size));
+ tensor_map[input_index] = input_tensor;
+ }
+
+ for (uint32_t n = 0; n < _graph.getOutputs().size(); n++)
+ {
+ ir::IOIndex index{n};
+ const auto output_index = _graph.getOutputs().at(index);
+ const auto &output = *desc.outputs.at(n);
+
+ auto output_tensor = std::make_shared<Tensor>(output.info);
+ output_tensor->setBuffer(
+ std::make_shared<ExternalBuffer>(reinterpret_cast<uint8_t *>(output.buffer), output.size));
+ tensor_map[output_index] = output_tensor;
+ }
+
+ /************************************************************************
+ * Prepare execution environment
+ Execution environment will be assigned to invoked interpreter instance
+ ***********************************************************************/
+
+ std::unique_ptr<ExecEnv> interp_env = nnfw::cpp14::make_unique<ExecEnv>(_graph);
+
+ // Assign input tensor into interpreter execution environment
+ for (auto index : _graph.getInputs())
+ {
+ if (tensor_map.find(index) != tensor_map.end())
+ {
+ VERBOSE(INTERPRETER) << "Assign input tensor. operand index:" << index.value() << std::endl;
+ interp_env->assignTensor(index, tensor_map.at(index));
+ }
+ }
+
+ // Assign output tensor into interpreter execution environment
+ for (auto index : _graph.getOutputs())
+ {
+ if (tensor_map.find(index) != tensor_map.end())
+ {
+ VERBOSE(INTERPRETER) << "Assign output tensor. operand index: " << index.value() << std::endl;
+ interp_env->assignTensor(index, tensor_map.at(index));
+ }
+ }
+
+ // Allocate constant tensor
+ _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ if (obj.isConstant())
+ {
+ VERBOSE(INTERPRETER) << "Allocate and assign constant tensor. operand index:" << ind.value()
+ << std::endl;
+
+ auto const_tensor = std::make_shared<ROTensor>(obj.info());
+ // Assume that interpreter's tensor layout is same with model (NHWC)
+ const_tensor->setData(
+ std::make_shared<ir::ExternalData>(obj.data().base(), obj.info().total_size()));
+ interp_env->assignTensor(ind, const_tensor);
+ }
+ });
+
+ /*****************************************************************************
+ * Invoke interpreter
+ ****************************************************************************/
+
+ Interpreter interp(std::move(interp_env));
+ interp.run();
+
+ /*****************************************************************************
+ * Invoked interpreter run is finished
+ ****************************************************************************/
+
+ // If interpreter execute submodel
+ // 1. Get tensor output of submodel into tensor_map to save result
+ // 2. Generate new ExecEnv for next interpretation
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/ExecManager.h b/runtime/neurun/core/src/exec/interp/ExecManager.h
new file mode 100644
index 000000000..f952abf02
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/ExecManager.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ExecManager.h
+ * @brief This file contains ExecManager class\n
+ * to manage interpreter execution and environment
+ */
+#ifndef __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_
+#define __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_
+
+#include "ir/Graph.h"
+#include "exec/IExecutor.h"
+#include "Tensor.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+
+/**
+ * @brief Class to execute model using interpreter
+ */
+class ExecManager final : public IExecutor
+{
+public:
+ explicit ExecManager(const ir::Graph &graph) : _graph(graph)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Return graph object
+ * @return Graph object
+ */
+ const ir::Graph &graph() final { return _graph; }
+ void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) override{
+ // Not implemented
+ };
+ /**
+ * @brief Start execution
+ * @note It should be called after setting input and output buffer
+ */
+ void execute(const IODescription &desc) final;
+
+private:
+ const ir::Graph &_graph;
+ ir::OperandIndexMap<std::shared_ptr<ITensor>> _tensor_map;
+};
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_
diff --git a/runtime/neurun/core/src/exec/interp/Interpreter.cc b/runtime/neurun/core/src/exec/interp/Interpreter.cc
new file mode 100644
index 000000000..8373419f6
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/Interpreter.cc
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Interpreter.h"
+
+#include <stack>
+#include <unordered_set>
+
+#include "Registration.h"
+
+#include "ir/OperandIndexMap.h"
+#include "util/logging.h"
+#include "ir/OperationVisitor.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+
+// TODO more structured execution kernel implementation
+// TODO use cker for execution
+// TODO divide tensor prepare and execution
+// TODO introduce memory manager (buffer allocate and free)
+class OperationExecutor : ir::OperationVisitor
+{
+public:
+ OperationExecutor(ExecEnv *env) : _env{env}
+ {
+ _kernels[ir::OpCode::Add] = getAdd();
+ _kernels[ir::OpCode::Sub] = getSub();
+ _kernels[ir::OpCode::Mul] = getMul();
+ _kernels[ir::OpCode::Conv2D] = getConv2D();
+ _kernels[ir::OpCode::MaxPool2D] = getMaxPool2D();
+ _kernels[ir::OpCode::Concat] = getConcat();
+ _kernels[ir::OpCode::Gather] = getGather();
+ _kernels[ir::OpCode::AvgPool2D] = getAvgPool2D();
+ _kernels[ir::OpCode::FullyConnected] = getFullyConnected();
+ _kernels[ir::OpCode::InstanceNorm] = getInstanceNorm();
+ _kernels[ir::OpCode::Softmax] = getSoftMax();
+ _kernels[ir::OpCode::Reshape] = getReshape();
+ _kernels[ir::OpCode::DepthwiseConv2D] = getDepthwiseConv();
+ _kernels[ir::OpCode::TransposeConv] = getTransposeConv();
+ _kernels[ir::OpCode::Logistic] = getLogistic();
+ _kernels[ir::OpCode::Pad] = getPad();
+ _kernels[ir::OpCode::ReLU] = getReLU();
+ _kernels[ir::OpCode::ReLU1] = getReLU1();
+ _kernels[ir::OpCode::ReLU6] = getReLU6();
+ _kernels[ir::OpCode::Tanh] = getTanh();
+ }
+
+ void execute(const ir::OperationIndex &idx)
+ {
+ const auto nodeName = _env->graph().operations().at(idx).name();
+ VERBOSE(INTERPRETER) << "Prepare output operands and execute " << nodeName
+ << " operation (id: " << idx.value() << ")" << std::endl;
+ _env->graph().operations().at(idx).accept(*this);
+ }
+
+private:
+#define OP(InternalName) \
+ void visit(const ir::operation::InternalName &node) override \
+ { \
+ if (_kernels[ir::OpCode::InternalName]->prepare != nullptr) \
+ { \
+ _kernels[ir::OpCode::InternalName]->prepare(_env, node); \
+ } \
+ _kernels[ir::OpCode::InternalName]->invoke(_env, node); \
+ }
+#include "ir/Operations.lst"
+#undef OP
+
+private:
+ ExecEnv *_env;
+ std::unordered_map<ir::OpCode, OpKernel *> _kernels;
+};
+
+void Interpreter::run()
+{
+ VERBOSE(INTERPRETER) << "Interpreter is invoked " << std::endl;
+
+ // operand_stack: save operands prepared to use
+ std::stack<ir::OperandIndex> operand_stack;
+
+ // Note: We should push input first, then constant.
+ // We use use-def for find operators ready to execution,
+ // but Use-Def cannot handle parameters (maybe constant, but not always)
+ // Note: If all model inputs are constant, it may not work (depend on tensors' order).
+ // But that scenario may not exist
+ for (auto ind : _env->graph().getInputs())
+ {
+ VERBOSE(INTERPRETER) << "Input: Push to operand stack " << ind.value() << std::endl;
+
+ operand_stack.push(ind);
+ }
+
+ _env->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ if (obj.isConstant())
+ {
+ VERBOSE(INTERPRETER) << "Constant: Push to operand stack " << ind.value() << std::endl;
+
+ operand_stack.push(ind);
+ }
+ });
+
+ // Execution
+ std::unordered_set<ir::OperandIndex> ready_check;
+ std::unordered_set<ir::OperationIndex> executed;
+ OperationExecutor executor{_env.get()};
+ while (!operand_stack.empty())
+ {
+ const auto current_operand_index = operand_stack.top();
+ operand_stack.pop();
+ VERBOSE(INTERPRETER) << "Poped operand " << current_operand_index.value()
+ << " is checked ready to use" << std::endl;
+
+ assert(ready_check.find(current_operand_index) == ready_check.end());
+ ready_check.insert(current_operand_index);
+
+ // Find prepared operations by scan use of current operand
+ std::stack<ir::OperationIndex> operation_stack;
+ auto use_operators = std::list<ir::OperationIndex>(
+ _env->graph().operands().at(current_operand_index).getUses().list());
+ // Remove operation index duplication
+ // If one operation uses same operand tensor for multiple input,
+ // use-list have duplicated operation index
+ use_operators.unique();
+ for (auto use_operator : use_operators)
+ {
+ // Assumption: all parameters are ready to use
+ bool operator_ready = true;
+ for (auto input_index : _env->graph().operations().at(use_operator).getInputs())
+ {
+ if (ready_check.find(input_index) == ready_check.end())
+ {
+ operator_ready = false;
+ break;
+ }
+ }
+
+ if (operator_ready)
+ {
+ VERBOSE(INTERPRETER) << "Ready to execute operation " << use_operator.value() << std::endl;
+ operation_stack.push(use_operator);
+ }
+ }
+
+ while (!operation_stack.empty())
+ {
+ const auto current_operation_index = operation_stack.top();
+ operation_stack.pop();
+ VERBOSE(INTERPRETER) << "Poped operation: " << current_operation_index.value() << "("
+ << _env->graph().operations().at(current_operation_index).name() << ")"
+ << std::endl;
+
+ // execution
+ // 1. Prepare output tensor
+ // 2. Call operation kernel
+ executor.execute(current_operation_index);
+ executed.insert(current_operation_index);
+
+ // 3. Push each output into operand stack
+ const auto def_operands = _env->graph().operations().at(current_operation_index).getOutputs();
+ for (auto def_operand : def_operands)
+ {
+ VERBOSE(INTERPRETER) << "Buffer: Push to operand stack " << def_operand.value()
+ << std::endl;
+ operand_stack.push(def_operand);
+ }
+
+ // 4. Free if lifetime of buffer operands used by input is finished
+ for (auto input_index : _env->graph().operations().at(current_operation_index).getInputs())
+ {
+ const auto use_operators = _env->graph().operands().at(input_index).getUses();
+ bool dead_buffer = true;
+ for (auto use_operator : use_operators.list())
+ {
+ if (executed.find(use_operator) == executed.end())
+ {
+ dead_buffer = false;
+ break;
+ }
+ }
+
+ if (dead_buffer)
+ {
+ _env->freeIfAllocated(input_index);
+ }
+ }
+ }
+ }
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/Interpreter.h b/runtime/neurun/core/src/exec/interp/Interpreter.h
new file mode 100644
index 000000000..1b73592b3
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/Interpreter.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Interpreter.h
+ * @brief This file contains Interpreter class for interpretation
+ */
+#ifndef __NEURUN_EXEC_INTERP_INTERPRETER_H__
+#define __NEURUN_EXEC_INTERP_INTERPRETER_H__
+
+#include "ExecEnv.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+
+/**
+ * @brief Class for interpretation
+ */
+class Interpreter
+{
+
+public:
+ /**
+ * @brief Construct a new Interpreter object (deleted)
+ */
+ Interpreter() = delete;
+ /**
+ * @brief Construct a new Interpreter object
+ * @param[in] env Execution environment variable for interpreter object
+ */
+ Interpreter(std::unique_ptr<ExecEnv> env) : _env{std::move(env)}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Run interpreter until there is no operation to execute
+ */
+ void run();
+
+private:
+ std::unique_ptr<ExecEnv> _env;
+};
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_INTERP_INTERPRETER_H__
diff --git a/runtime/neurun/core/src/exec/interp/Registration.h b/runtime/neurun/core/src/exec/interp/Registration.h
new file mode 100644
index 000000000..3ebe3bc9f
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/Registration.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_EXEC_INTERP_REGISTRATION_H__
+#define __NEURUN_EXEC_INTERP_REGISTRATION_H__
+
+#include "ExecEnv.h"
+
+#include "ir/Operation.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+
+struct OpKernel
+{
+ std::function<void(ExecEnv *, const ir::Operation &)> prepare;
+ std::function<void(const ExecEnv *, const ir::Operation &)> invoke;
+};
+
+// Defined in operations/ directory
+OpKernel *getAdd();
+OpKernel *getSub();
+OpKernel *getMul();
+OpKernel *getConv2D();
+OpKernel *getMaxPool2D();
+OpKernel *getConcat();
+OpKernel *getGather();
+OpKernel *getAvgPool2D();
+OpKernel *getFullyConnected();
+OpKernel *getInstanceNorm();
+OpKernel *getSoftMax();
+OpKernel *getDepthwiseConv();
+OpKernel *getReshape();
+OpKernel *getTransposeConv();
+OpKernel *getLogistic();
+OpKernel *getPad();
+OpKernel *getReLU();
+OpKernel *getReLU1();
+OpKernel *getReLU6();
+OpKernel *getTanh();
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_INTERP_REGISTRATION_H__
diff --git a/runtime/neurun/core/src/exec/interp/Tensor.cc b/runtime/neurun/core/src/exec/interp/Tensor.cc
new file mode 100644
index 000000000..5c1da3587
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/Tensor.cc
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Tensor.h"
+
+#define NO_USE(a) (void)(a)
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+
+void ITensor::access(const std::function<void(backend::operand::ITensor &tensor)> &fn)
+{
+ fn(*this);
+}
+
+size_t ROTensor::calcOffset(const neurun::util::Coordinates &coords) const
+{
+ NO_USE(coords);
+ throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now.");
+}
+
+size_t Tensor::calcOffset(const neurun::util::Coordinates &coords) const
+{
+ NO_USE(coords);
+ throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now.");
+}
+
+ir::Layout ROTensor::layout() const
+{
+ // TODO Changes to return frontend layout
+ return ir::Layout::NHWC;
+}
+
+ir::Layout Tensor::layout() const
+{
+ // TODO Changes to return frontend layout
+ return ir::Layout::NHWC;
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/Tensor.h b/runtime/neurun/core/src/exec/interp/Tensor.h
new file mode 100644
index 000000000..c53fd46a6
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/Tensor.h
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Tensor.h
+ * @brief This file contains ITensor interface, ROTensor class, and Tensor class
+ */
+#ifndef __NEURUN_EXEC_INTERP_TENSOR_H__
+#define __NEURUN_EXEC_INTERP_TENSOR_H__
+
+#include "Buffer.h"
+
+#include "ir/OperandInfo.h"
+#include "backend/operand/ITensor.h"
+#include "ir/Layout.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+
+/**
+ * @brief Interface to handle Tensor in interpreter
+ */
+class ITensor : public backend::operand::ITensor
+{
+public:
+ virtual ~ITensor() = default;
+
+public:
+ virtual uint8_t *buffer() const = 0;
+ /**
+ * @brief Return shared pointer for buffer
+ * @return Buffer shared pointer
+ */
+ virtual std::shared_ptr<const Buffer> shareBuffer() const = 0;
+ /**
+ * @brief Return read-only buffer pointer
+ * @return Read-only buffer pointer
+ */
+ virtual const uint8_t *bufferRO() const = 0;
+ /**
+ * @brief Return shared pointer for data
+ * @return Data shared pointer
+ */
+ virtual std::shared_ptr<const ir::Data> shareData() const = 0;
+ /**
+ * @brief Set internal/external buffer
+ * @param[in] buffer Buffer pointer
+ */
+ virtual void setBuffer(std::shared_ptr<const Buffer> buffer) = 0;
+ /**
+ * @brief Set data reference (including constant, input)
+ * @param[in] data Data pointer
+ */
+ virtual void setData(std::shared_ptr<const ir::Data> data) = 0;
+ virtual void releaseData() = 0;
+
+ virtual size_t total_size() const = 0;
+ virtual size_t dimension(size_t index) const = 0;
+ virtual size_t num_dimensions() const = 0;
+ virtual size_t calcOffset(const util::Coordinates &coords) const = 0;
+
+ virtual bool has_padding() const = 0;
+ /**
+ * @brief Return data type of tensor
+ * @return Data type of tensor
+ */
+ virtual ir::DataType data_type() const = 0;
+ /**
+ * @brief Return TensorInfo
+ * @return TensorInfo
+ */
+ virtual const ir::OperandInfo &tensorInfo() const = 0;
+ /**
+ * @brief Return number of elements
+ * @return Number of elements
+ */
+ virtual uint64_t num_elements() const = 0;
+ void access(const std::function<void(backend::operand::ITensor &tensor)> &fn) final;
+};
+
+/**
+ * @brief Class to handle tensor in interpreter as read-only
+ */
+class ROTensor final : public ITensor
+{
+public:
+ ROTensor() = delete;
+ ROTensor(const ir::OperandInfo &info) : _info(info)
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint8_t *buffer() const override { throw std::runtime_error{"Read only tensor"}; }
+ std::shared_ptr<const Buffer> shareBuffer() const override
+ {
+ throw std::runtime_error{"Read only tensor"};
+ }
+ const uint8_t *bufferRO() const override { return _data->base(); }
+ std::shared_ptr<const ir::Data> shareData() const override { return _data; }
+ void setBuffer(std::shared_ptr<const Buffer> buffer) override { _data = buffer; }
+ void setData(std::shared_ptr<const ir::Data> data) override { _data = data; }
+ void releaseData() override { _data = nullptr; }
+
+ size_t total_size() const override { return _info.total_size(); }
+ size_t dimension(size_t index) const override { return _info.shape().dim(index); }
+ size_t num_dimensions() const override { return _info.shape().rank(); }
+ size_t calcOffset(const util::Coordinates &coords) const override;
+ ir::Layout layout() const override;
+ bool has_padding() const override { return false; }
+ ir::DataType data_type() const override { return _info.typeInfo().type(); }
+ const ir::OperandInfo &tensorInfo() const override { return _info; }
+ uint64_t num_elements() const override { return _info.shape().num_elements(); };
+
+private:
+ const ir::OperandInfo _info;
+ std::shared_ptr<const ir::Data> _data{nullptr};
+};
+
+/**
+ * @brief Class to handle tensor in interpreter as writable
+ */
+class Tensor final : public ITensor
+{
+public:
+ Tensor() = delete;
+ Tensor(const ir::OperandInfo &info) : _info(info)
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint8_t *buffer() const override { return _buffer->baseWritable(); }
+ std::shared_ptr<const Buffer> shareBuffer() const override { return _buffer; };
+ const uint8_t *bufferRO() const override { return _buffer->base(); }
+ std::shared_ptr<const ir::Data> shareData() const override { return _buffer; }
+ void setBuffer(std::shared_ptr<const Buffer> buffer) override { _buffer = buffer; }
+ void setData(std::shared_ptr<const ir::Data>) override
+ {
+ throw std::runtime_error{"Passed data may read-only"};
+ }
+ void releaseData() override { _buffer = nullptr; }
+
+ size_t total_size() const override { return _info.total_size(); }
+ size_t dimension(size_t index) const override { return _info.shape().dim(index); }
+ size_t num_dimensions() const override { return _info.shape().rank(); }
+ size_t calcOffset(const util::Coordinates &coords) const override;
+ ir::Layout layout() const override;
+ bool has_padding() const override { return false; }
+ ir::DataType data_type() const override { return _info.typeInfo().type(); }
+ const ir::OperandInfo &tensorInfo() const override { return _info; }
+ uint64_t num_elements() const override { return _info.shape().num_elements(); };
+
+private:
+ const ir::OperandInfo _info;
+ std::shared_ptr<const Buffer> _buffer{nullptr};
+};
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_INTERP_TENSOR_H__
diff --git a/runtime/neurun/core/src/exec/interp/operations/AvgPool2D.cc b/runtime/neurun/core/src/exec/interp/operations/AvgPool2D.cc
new file mode 100644
index 000000000..bd396491f
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/operations/AvgPool2D.cc
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/AveragePool.h>
+
+#include "OperationUtil.h"
+
+#include "exec/interp/Registration.h"
+#include "ir/operation/AvgPool2D.h"
+#include "util/Utils.h"
+#include "util/Padding.h"
+#include "util/ShapeInference.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+namespace avgpool2d
+{
+
+void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node)
+{
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto in_tensor = env->tensorAt(in_index);
+ UNUSED_RELEASE(in_tensor);
+
+ assert(in_tensor->num_dimensions() == 4);
+
+ const auto output_info = env->graph().operands().at(out_index).info();
+ if (output_info.total_size() == 0)
+ {
+ // Handle unspecified output shape
+ const auto &avgpool_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node);
+ const auto infered_output_shapes =
+ shape_inference::inferAvgPoolShape(in_tensor->tensorInfo().shape(), avgpool_node.param());
+ env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()});
+ }
+ else
+ {
+ env->allocateIfNeeded(out_index, output_info);
+ }
+
+ auto out_tensor = env->tensorAt(out_index);
+ UNUSED_RELEASE(out_tensor);
+
+ // Handle same ifm & ofm data type only
+ assert(in_tensor->data_type() == out_tensor->data_type());
+ assert(out_tensor->num_dimensions() == 4);
+}
+
+void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
+ const ir::operation::AvgPool2D::Param &param)
+{
+ // TODO Support NCHW frontend
+ const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape,
+ param.stride, param.kw, param.kh);
+ // Calculate
+ nnfw::cker::PoolParams cker_param;
+ calculateActivationRange(param.activation, &cker_param.float_activation_min,
+ &cker_param.float_activation_max);
+ cker_param.filter_width = param.kw;
+ cker_param.filter_height = param.kh;
+ cker_param.padding_values.width = padding.left;
+ cker_param.padding_values.height = padding.top;
+ cker_param.stride_width = param.stride.horizontal;
+ cker_param.stride_height = param.stride.vertical;
+
+ const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
+ const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
+ const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
+ float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
+
+ nnfw::cker::AveragePool(cker_param, in_shape, in_ptr, out_shape, out_ptr);
+}
+
+void invokeAvgPool2D(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &avgpool_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node);
+
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ // Check lhs shape is same with rhs (with broadcast)
+ const auto in_tensor = env->tensorAt(in_index);
+ const auto out_tensor = env->tensorAt(out_index);
+
+ const auto data_type = in_tensor->data_type();
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(in_tensor, out_tensor, avgpool_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Support float only"};
+ }
+}
+} // namespace avgpool2d
+
+OpKernel *getAvgPool2D()
+{
+ static OpKernel kernel = {avgpool2d::prepareAvgPool2D, avgpool2d::invokeAvgPool2D};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/BinaryArithmeticOps.cc b/runtime/neurun/core/src/exec/interp/operations/BinaryArithmeticOps.cc
new file mode 100644
index 000000000..16469b9db
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/operations/BinaryArithmeticOps.cc
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+#include "OperationUtil.h"
+
+#include "exec/interp/Registration.h"
+#include "ir/operation/Add.h"
+#include "ir/operation/Sub.h"
+#include "ir/operation/Mul.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+namespace
+{
+
+enum class OpType
+{
+ ADD,
+ SUB,
+ MUL
+};
+
+template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation &node)
+{
+ const auto &add_node = nnfw::misc::polymorphic_downcast<const node_type &>(node);
+
+ const auto lhs_index = node.getInputs().at(add_node.LHS);
+ const auto rhs_index = node.getInputs().at(add_node.RHS);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto lhs_tensor = env->tensorAt(lhs_index);
+ const auto rhs_tensor = env->tensorAt(rhs_index);
+
+ // Check shape and type lhs is same with rhs
+ // TODO Util function to compare TensorInfo
+ if (lhs_tensor->data_type() != rhs_tensor->data_type())
+ {
+ throw std::runtime_error{"Interp(Add): Different input types"};
+ }
+
+ bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape());
+ if (try_broadcast)
+ {
+ bool success = true;
+ auto out_shape = calcBroadcastShape(lhs_tensor->tensorInfo().shape(),
+ rhs_tensor->tensorInfo().shape(), success);
+ if (!success)
+ {
+ throw std::runtime_error{"Interp(Add): Fail to brodcasting"};
+ }
+
+ auto output_info = ir::OperandInfo(out_shape, lhs_tensor->tensorInfo().typeInfo());
+ // We can handle already allocated (ex. model output)
+ env->allocateIfNeeded(out_index, output_info);
+ }
+ else
+ {
+ // Output's shape and type is same with input
+ auto output_info = lhs_tensor->tensorInfo();
+ // We can handle already allocated (ex. model output)
+ env->allocateIfNeeded(out_index, output_info);
+ }
+
+ auto out_tensor = env->tensorAt(out_index);
+ // Check shape and type lhs is same with output
+ // TODO Util function to compare TensorInfo
+ if (lhs_tensor->data_type() != out_tensor->data_type())
+ {
+ throw std::runtime_error{"Interp(Add): Invalid output type"};
+ }
+}
+
+inline void setActivationParams(float min, float max, nnfw::cker::BinaryArithmeticOpParam *params)
+{
+ params->float_activation_min = min;
+ params->float_activation_max = max;
+}
+
+inline void setActivationParams(int32_t min, int32_t max,
+ nnfw::cker::BinaryArithmeticOpParam *params)
+{
+ params->quantized_activation_min = min;
+ params->quantized_activation_max = max;
+}
+
+template <typename raw_type, typename param_type, OpType op_type>
+void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor,
+ const param_type &param)
+{
+ const auto lhs_buffer = lhs_tensor->bufferRO();
+ const auto rhs_buffer = rhs_tensor->bufferRO();
+ auto out_buffer = out_tensor->buffer();
+
+ nnfw::cker::BinaryArithmeticOpParam cker_param;
+ raw_type activation_min, activation_max;
+ calculateActivationRange(param.activation, &activation_min, &activation_max);
+ setActivationParams(activation_min, activation_max, &cker_param);
+ const raw_type *lhs_ptr = reinterpret_cast<const raw_type *>(lhs_buffer);
+ const raw_type *rhs_ptr = reinterpret_cast<const raw_type *>(rhs_buffer);
+ raw_type *out_ptr = reinterpret_cast<raw_type *>(out_buffer);
+
+ // Calculate
+ const std::function<raw_type(const raw_type &, const raw_type &)> fn_add =
+ [](const raw_type &a, const raw_type &b) { return a + b; };
+ const std::function<raw_type(const raw_type &, const raw_type &)> fn_sub =
+ [](const raw_type &a, const raw_type &b) { return a - b; };
+ const std::function<raw_type(const raw_type &, const raw_type &)> fn_mul =
+ [](const raw_type &a, const raw_type &b) { return a * b; };
+
+ const std::function<raw_type(const raw_type &, const raw_type &)> fn =
+ (op_type == OpType::ADD) ? fn_add : ((op_type == OpType::SUB) ? fn_sub : fn_mul);
+
+ if (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape())
+ {
+ const auto lhs_shape = convertExtendShape(lhs_tensor->tensorInfo().shape());
+ const auto rhs_shape = convertExtendShape(rhs_tensor->tensorInfo().shape());
+ const auto out_shape = convertExtendShape(out_tensor->tensorInfo().shape());
+ nnfw::cker::BroadcastBinaryArithmeticOpSlow(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr,
+ out_shape, out_ptr, fn);
+ return;
+ }
+
+ const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape());
+ const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape());
+ const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
+ nnfw::cker::BinaryArithmeticOp(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr, out_shape,
+ out_ptr, fn);
+}
+
+template <typename node_type, typename param_type, OpType op_type>
+void invokeAdd(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &arithmetic_node = nnfw::misc::polymorphic_downcast<const node_type &>(node);
+
+ const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
+ const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
+ const auto out_index = node.getOutputs().at(0);
+ const auto lhs_tensor = env->tensorAt(lhs_index);
+ const auto rhs_tensor = env->tensorAt(rhs_index);
+ const auto out_tensor = env->tensorAt(out_index);
+ const auto data_type = lhs_tensor->data_type();
+
+ if (data_type == ir::DataType::INT32)
+ {
+ invoke<int32_t, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor,
+ arithmetic_node.param());
+ }
+ else if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke<float, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor, arithmetic_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Unsupported data type"};
+ }
+}
+} // namespace add
+
+OpKernel *getAdd()
+{
+ static OpKernel kernel = {prepareAdd<ir::operation::Add>,
+ invokeAdd<ir::operation::Add, ir::operation::Add::Param, OpType::ADD>};
+ return &kernel;
+}
+
+OpKernel *getSub()
+{
+ static OpKernel kernel = {prepareAdd<ir::operation::Sub>,
+ invokeAdd<ir::operation::Sub, ir::operation::Sub::Param, OpType::SUB>};
+ return &kernel;
+}
+
+OpKernel *getMul()
+{
+ static OpKernel kernel = {prepareAdd<ir::operation::Mul>,
+ invokeAdd<ir::operation::Mul, ir::operation::Mul::Param, OpType::MUL>};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/Concat.cc b/runtime/neurun/core/src/exec/interp/operations/Concat.cc
new file mode 100644
index 000000000..a127e5f30
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/operations/Concat.cc
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/Concatenation.h>
+
+#include "OperationUtil.h"
+
+#include "exec/interp/Registration.h"
+#include "ir/operation/Concat.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+namespace concat
+{
+
+void prepareConcat(ExecEnv *env, const ir::Operation &node)
+{
+ const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node);
+
+ const auto first_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto first_tensor = env->tensorAt(first_index);
+ uint32_t out_axis_dimension = 0;
+ const int32_t axis_raw = concat_node.param().axis;
+ const uint32_t axis = (axis_raw < 0) ? (axis_raw + first_tensor->num_dimensions()) : axis_raw;
+
+ // All inputs shape should be same except axis dimension
+ // All inputs type should be same
+ for (auto input : node.getInputs())
+ {
+ assert(first_tensor->num_dimensions() == env->tensorAt(input)->num_dimensions());
+ assert(first_tensor->data_type() == env->tensorAt(input)->data_type());
+ for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++)
+ {
+ if (i == axis)
+ {
+ out_axis_dimension += env->tensorAt(input)->dimension(i);
+ continue;
+ }
+ assert(first_tensor->dimension(i) == env->tensorAt(input)->dimension(i));
+ }
+ }
+
+ // Make output tensor info using first input tensor info, and accumulated axis dimension value
+ auto out_shape = first_tensor->tensorInfo().shape();
+ out_shape.dim(axis) = out_axis_dimension;
+ env->allocateIfNeeded(out_index,
+ ir::OperandInfo{out_shape, first_tensor->tensorInfo().typeInfo()});
+
+ auto out_tensor = env->tensorAt(out_index);
+ UNUSED_RELEASE(out_tensor);
+
+ // Output shape should be same with input except axis dimension
+ // Output type should be same with input
+ assert(first_tensor->data_type() == out_tensor->data_type());
+ for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++)
+ {
+ if (i == axis)
+ {
+ continue;
+ }
+ assert(first_tensor->dimension(i) == out_tensor->dimension(i));
+ }
+}
+
+void invoke(const std::vector<const ITensor *> in_tensors, const ITensor *out_tensor, uint32_t axis)
+{
+ const uint32_t count = in_tensors.size();
+
+ // Calculate
+ nnfw::cker::ConcatenationParams cker_param;
+ cker_param.axis = (int8_t)axis;
+ cker_param.inputs_count = count;
+
+ const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
+
+ std::vector<nnfw::cker::Shape> in_shapes;
+ std::vector<const nnfw::cker::Shape *> in_shape_ptrs;
+ in_shapes.reserve(count);
+ in_shape_ptrs.reserve(count);
+ std::vector<const float *> in_ptrs;
+ for (uint32_t i = 0; i < count; i++)
+ {
+ in_shapes.push_back(convertShape(in_tensors[i]->tensorInfo().shape()));
+ in_shape_ptrs.push_back(&in_shapes[i]);
+ in_ptrs.push_back(reinterpret_cast<const float *>(in_tensors[i]->bufferRO()));
+ }
+
+ auto out_buffer = out_tensor->buffer();
+ float *out_ptr = reinterpret_cast<float *>(out_buffer);
+
+ nnfw::cker::Concatenation<float>(cker_param, in_shape_ptrs.data(), in_ptrs.data(), out_shape,
+ out_ptr);
+}
+
+void invokeConcat(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node);
+ const int32_t axis_raw = concat_node.param().axis;
+
+ std::vector<const ITensor *> in_tensors;
+ for (const auto &e : concat_node.getInputs())
+ {
+ in_tensors.emplace_back(env->tensorAt(e));
+ }
+
+ const auto out_index = node.getOutputs().at(0);
+ const auto out_tensor = env->tensorAt(out_index);
+ const uint32_t axis = (axis_raw < 0) ? (axis_raw + out_tensor->num_dimensions()) : axis_raw;
+
+ const auto data_type = in_tensors[0]->data_type();
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(in_tensors, out_tensor, axis);
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Support float32 only"};
+ }
+}
+} // namespace concat
+
+OpKernel *getConcat()
+{
+ static OpKernel kernel = {concat::prepareConcat, concat::invokeConcat};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/Conv2D.cc b/runtime/neurun/core/src/exec/interp/operations/Conv2D.cc
new file mode 100644
index 000000000..5242247a4
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/operations/Conv2D.cc
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/Conv.h>
+
+#include "OperationUtil.h"
+
+#include "exec/interp/Registration.h"
+#include "ir/operation/Conv2D.h"
+#include "util/Utils.h"
+#include "util/Padding.h"
+#include "util/ShapeInference.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+namespace conv2d
+{
+
+void prepareConv2D(ExecEnv *env, const ir::Operation &node)
+{
+ const auto in_index = node.getInputs().at(ir::operation::Conv2D::INPUT);
+ const auto kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
+ const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto in_tensor = env->tensorAt(in_index);
+ const auto kernel_tensor = env->tensorAt(kernel_index);
+ const auto bias_tensor = env->tensorAt(bias_index);
+
+ assert(in_tensor->num_dimensions() == 4);
+ assert(kernel_tensor->num_dimensions() == 4);
+ assert(bias_tensor->num_dimensions() == 1);
+
+ UNUSED_RELEASE(in_tensor);
+ UNUSED_RELEASE(kernel_tensor);
+ UNUSED_RELEASE(bias_tensor);
+
+ const auto output_info = env->graph().operands().at(out_index).info();
+ if (output_info.total_size() == 0)
+ {
+ // Handle unspecified output shape
+ const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
+ const auto infered_output_shapes = shape_inference::inferConv2DShape(
+ in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), conv_node.param());
+ env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()});
+ }
+ else
+ {
+ env->allocateIfNeeded(out_index, output_info);
+ }
+
+ auto out_tensor = env->tensorAt(out_index);
+ UNUSED_RELEASE(out_tensor);
+
+ // Handle same ifm & ofm data type only
+ assert(in_tensor->data_type() == out_tensor->data_type());
+ assert(out_tensor->num_dimensions() == 4);
+}
+
+void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
+ const ITensor *ofm_tensor, const ir::operation::Conv2D::Param &param)
+{
+ // TODO Support NCHW frontned
+ const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto &ker_shape = ker_tensor->tensorInfo().shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+ const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape,
+ param.stride, ker_width, ker_height);
+
+ // Calculate
+ float activation_min, activation_max;
+ calculateActivationRange(param.activation, &activation_min, &activation_max);
+
+ nnfw::cker::ConvParams cker_param;
+ cker_param.padding_values.width = padding.left;
+ cker_param.padding_values.height = padding.top;
+ cker_param.stride_width = param.stride.horizontal;
+ cker_param.stride_height = param.stride.vertical;
+ cker_param.dilation_width_factor = 1;
+ cker_param.dilation_height_factor = 1;
+ cker_param.float_activation_min = activation_min;
+ cker_param.float_activation_max = activation_max;
+
+ const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
+ const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
+ const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
+ const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
+ const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
+ const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
+ const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO());
+ float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
+
+ nnfw::cker::Conv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, cker_bias_shape,
+ bias_ptr, cker_ofm_shape, ofm_ptr);
+}
+
+void invokeConv2D(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
+
+ const auto ifm_index = node.getInputs().at(ir::operation::Conv2D::INPUT);
+ const auto ker_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
+ const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
+ const auto ofm_index = node.getOutputs().at(0);
+
+ const auto ifm_tensor = env->tensorAt(ifm_index);
+ const auto ker_tensor = env->tensorAt(ker_index);
+ const auto bias_tensor = env->tensorAt(bias_index);
+ const auto ofm_tensor = env->tensorAt(ofm_index);
+
+ const auto data_type = ifm_tensor->data_type();
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Support float32 only"};
+ }
+}
+} // namespace conv2d
+
+OpKernel *getConv2D()
+{
+ static OpKernel kernel = {conv2d::prepareConv2D, conv2d::invokeConv2D};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/DepthwiseConv.cc b/runtime/neurun/core/src/exec/interp/operations/DepthwiseConv.cc
new file mode 100644
index 000000000..1d3649f48
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/operations/DepthwiseConv.cc
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/DepthwiseConv.h>
+#include <misc/polymorphic_downcast.h>
+
+#include "OperationUtil.h"
+
+#include "exec/interp/Registration.h"
+#include "ir/operation/DepthwiseConv2D.h"
+#include "util/Padding.h"
+#include "util/Utils.h"
+#include "util/ShapeInference.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+
+namespace
+{
+
+void prepareDepthwiseConv(ExecEnv *env, const ir::Operation &node)
+{
+ const auto in_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT);
+ const auto kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
+ const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto in_tensor = env->tensorAt(in_index);
+ const auto kernel_tensor = env->tensorAt(kernel_index);
+ const auto bias_tensor = env->tensorAt(bias_index);
+
+ assert(in_tensor->num_dimensions() == 4);
+ assert(kernel_tensor->num_dimensions() == 4);
+ assert(bias_tensor->num_dimensions() == 1);
+
+ UNUSED_RELEASE(in_tensor);
+ UNUSED_RELEASE(kernel_tensor);
+ UNUSED_RELEASE(bias_tensor);
+
+ // TODO handle unspecified output shape:
+ // calculate output shape using ifm shape, kernel shape, padding, stride
+ const auto output_info = env->graph().operands().at(out_index).info();
+ if (output_info.total_size() == 0)
+ {
+ // Handle unspecified output shape
+ const auto &depth_conv_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::DepthwiseConv2D &>(node);
+ const auto infered_output_shapes = shape_inference::inferDepthwiseConv2DShape(
+ in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(),
+ depth_conv_node.param());
+ env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()});
+ }
+ else
+ {
+ env->allocateIfNeeded(out_index, output_info);
+ }
+
+ auto out_tensor = env->tensorAt(out_index);
+ UNUSED_RELEASE(out_tensor);
+
+ // Handle same ifm & ofm data type only
+ assert(in_tensor->data_type() == out_tensor->data_type());
+ assert(out_tensor->num_dimensions() == 4);
+}
+
+void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
+ const ITensor *ofm_tensor, const ir::operation::DepthwiseConv2D::Param &param)
+{
+ // TODO Support NCHW frontend
+ const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ // Kernel format is [1, kernel_height, kernel_width, depth_out].
+ const auto &ker_shape = ker_tensor->tensorInfo().shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+ const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape,
+ param.stride, ker_width, ker_height);
+
+ // Calculate
+ float activation_min, activation_max;
+ calculateActivationRange(param.activation, &activation_min, &activation_max);
+
+ nnfw::cker::DepthwiseConvParams cker_param;
+ cker_param.padding_values.width = padding.left;
+ cker_param.padding_values.height = padding.top;
+ cker_param.depth_multiplier = param.multiplier;
+ cker_param.stride_width = param.stride.horizontal;
+ cker_param.stride_height = param.stride.vertical;
+ cker_param.dilation_width_factor = 1;
+ cker_param.dilation_height_factor = 1;
+ cker_param.float_activation_min = activation_min;
+ cker_param.float_activation_max = activation_max;
+
+ const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
+ const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
+ const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
+ const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
+ const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
+ const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
+ const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO());
+ float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
+
+ nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
+ cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr);
+}
+
+void invokeDepthwiseConv(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &conv_node = static_cast<const ir::operation::DepthwiseConv2D &>(node);
+
+ const auto ifm_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT);
+ const auto ker_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
+ const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
+ const auto ofm_index = node.getOutputs().at(0);
+
+ const auto ifm_tensor = env->tensorAt(ifm_index);
+ const auto ker_tensor = env->tensorAt(ker_index);
+ const auto bias_tensor = env->tensorAt(bias_index);
+ const auto ofm_tensor = env->tensorAt(ofm_index);
+
+ const auto data_type = ifm_tensor->data_type();
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Support float32 only"};
+ }
+}
+
+} // namespace
+
+OpKernel *getDepthwiseConv()
+{
+ static OpKernel kernel = {prepareDepthwiseConv, invokeDepthwiseConv};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/FullyConnected.cc b/runtime/neurun/core/src/exec/interp/operations/FullyConnected.cc
new file mode 100644
index 000000000..9c1c5d4e2
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/operations/FullyConnected.cc
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/FullyConnected.h>
+
+#include "OperationUtil.h"
+
+#include "exec/interp/Registration.h"
+#include "ir/operation/FullyConnected.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+namespace fc
+{
+
+void prepareFC(ExecEnv *env, const ir::Operation &node)
+{
+ const auto in_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
+ const auto kernel_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
+ const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto in_tensor = env->tensorAt(in_index);
+ const auto kernel_tensor = env->tensorAt(kernel_index);
+ const auto bias_tensor = env->tensorAt(bias_index);
+
+ UNUSED_RELEASE(in_tensor);
+ UNUSED_RELEASE(kernel_tensor);
+ UNUSED_RELEASE(bias_tensor);
+
+ assert(in_tensor->num_dimensions() >= 2);
+ assert(kernel_tensor->num_dimensions() == 2);
+ assert(bias_tensor->num_dimensions() == 1);
+
+ const auto input_size_with_batch = in_tensor->num_elements();
+ const auto num_units = kernel_tensor->dimension(0);
+ const auto input_size = kernel_tensor->dimension(1);
+ const auto batch_size = input_size_with_batch / input_size;
+ assert(input_size_with_batch % input_size == 0);
+ assert(num_units == bias_tensor->dimension(0));
+
+ // Make output tensor info
+ ir::Shape output_shape(2);
+ output_shape.dim(0) = batch_size;
+ output_shape.dim(1) = num_units;
+ const ir::OperandInfo out_info{output_shape, in_tensor->tensorInfo().typeInfo()};
+ env->allocateIfNeeded(out_index, out_info);
+
+ auto out_tensor = env->tensorAt(out_index);
+ UNUSED_RELEASE(out_tensor);
+
+ // Handle same ifm & ofm data type only
+ assert(in_tensor->data_type() == out_tensor->data_type());
+ assert(out_tensor->num_dimensions() == 2);
+ assert(out_tensor->dimension(0) == batch_size);
+ assert(out_tensor->dimension(1) == num_units);
+}
+
+void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
+ const ITensor *ofm_tensor, const ir::operation::FullyConnected::Param &param)
+{
+ const auto ifm_buffer = ifm_tensor->bufferRO();
+ const auto ker_buffer = ker_tensor->bufferRO();
+ const auto bias_buffer = bias_tensor->bufferRO();
+ auto ofm_buffer = ofm_tensor->buffer();
+
+ // Calculate
+ nnfw::cker::FullyConnectedParams cker_param;
+ calculateActivationRange(param.activation, &cker_param.float_activation_min,
+ &cker_param.float_activation_max);
+ const auto cker_ifm_shape = convertExtendShape(ifm_tensor->tensorInfo().shape());
+ const auto cker_ker_shape = convertExtendShape(ker_tensor->tensorInfo().shape());
+ const auto cker_bias_shape = convertExtendShape(bias_tensor->tensorInfo().shape());
+ const auto cker_ofm_shape = convertExtendShape(ofm_tensor->tensorInfo().shape());
+ const float *ifm_ptr = reinterpret_cast<const float *>(ifm_buffer);
+ const float *ker_ptr = reinterpret_cast<const float *>(ker_buffer);
+ const float *bias_ptr = reinterpret_cast<const float *>(bias_buffer);
+ float *ofm_ptr = reinterpret_cast<float *>(ofm_buffer);
+
+ nnfw::cker::FullyConnected(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
+ cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr);
+}
+
+void invokeFC(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &conv_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::FullyConnected &>(node);
+
+ const auto ifm_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
+ const auto ker_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
+ const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
+ const auto ofm_index = node.getOutputs().at(0);
+
+ const auto ifm_tensor = env->tensorAt(ifm_index);
+ const auto ker_tensor = env->tensorAt(ker_index);
+ const auto bias_tensor = env->tensorAt(bias_index);
+ const auto ofm_tensor = env->tensorAt(ofm_index);
+
+ const auto data_type = ifm_tensor->data_type();
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Support float only"};
+ }
+}
+} // namespace fc
+
+OpKernel *getFullyConnected()
+{
+ static OpKernel kernel = {fc::prepareFC, fc::invokeFC};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/Gather.cc b/runtime/neurun/core/src/exec/interp/operations/Gather.cc
new file mode 100644
index 000000000..8b64d1937
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/operations/Gather.cc
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/Gather.h>
+
+#include "OperationUtil.h"
+
+#include "exec/interp/Registration.h"
+#include "ir/operation/Gather.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+namespace
+{
+
+void prepareGather(ExecEnv *env, const ir::Operation &node)
+{
+ const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT);
+ const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES);
+ const auto output_index = node.getOutputs().at(0);
+
+ const auto input_tensor = env->tensorAt(input_index);
+ const auto indices_tensor = env->tensorAt(indices_index);
+
+ // TODO handle unspecified output shape:
+ // calculate output shape using ifm shape, kernel shape, padding, stride
+ const auto output_info = env->graph().operands().at(output_index).info();
+ if (output_info.total_size() == 0)
+ {
+ throw std::runtime_error{"Interp(Gather): NYI for unspecified output shape"};
+ }
+ else
+ {
+ env->allocateIfNeeded(output_index, output_info);
+ }
+
+ if (indices_tensor->data_type() != ir::DataType::INT32)
+ {
+ throw std::runtime_error{"Interp(Gather): Invalid indices data type"};
+ }
+
+ auto output_tensor = env->tensorAt(output_index);
+ auto output_rank = input_tensor->num_dimensions() + indices_tensor->num_dimensions() - 1;
+
+ if (output_rank != output_tensor->num_dimensions())
+ {
+ throw std::runtime_error{"Interp(Gather): Invalid output rank"};
+ }
+ if (output_tensor->data_type() != input_tensor->data_type())
+ {
+ throw std::runtime_error{"Interp(Gather): Invalid output data type"};
+ }
+
+ if (input_tensor->data_type() == ir::DataType::QUANT8_ASYMM &&
+ input_tensor->tensorInfo().typeInfo() != output_tensor->tensorInfo().typeInfo())
+ {
+ throw std::runtime_error{
+ "Interp(Gather): Cannot handle different I/O QUANT8_ASYMM scale/offset"};
+ }
+}
+
+template <typename raw_type>
+void invoke(const ITensor *input_tensors, const ITensor *indices_tensors,
+ const ITensor *output_tensor, uint32_t axis)
+{
+ // Calculate
+ nnfw::cker::GatherParams cker_param;
+ cker_param.axis = (int8_t)axis;
+
+ const auto cker_input_shapes = convertShape(input_tensors->tensorInfo().shape());
+ const auto cker_indices_shape = convertShape(indices_tensors->tensorInfo().shape());
+ const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
+ const raw_type *input_ptr = reinterpret_cast<const raw_type *>(input_tensors->bufferRO());
+ const int32_t *indices_ptr = reinterpret_cast<const int32_t *>(indices_tensors->bufferRO());
+ raw_type *output_ptr = reinterpret_cast<raw_type *>(output_tensor->buffer());
+
+ nnfw::cker::Gather<raw_type>(cker_param, cker_input_shapes, input_ptr, cker_indices_shape,
+ indices_ptr, cker_output_shape, output_ptr);
+}
+
+void invokeGather(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &gather_node = nnfw::misc::polymorphic_downcast<const ir::operation::Gather &>(node);
+ const int32_t axis_raw = gather_node.param().axis;
+
+ const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT);
+ const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES);
+ const auto output_index = node.getOutputs().at(0);
+
+ const auto input_tensor = env->tensorAt(input_index);
+ const auto indices_tensor = env->tensorAt(indices_index);
+ const auto output_tensor = env->tensorAt(output_index);
+ const uint32_t axis = (axis_raw < 0) ? (axis_raw + input_tensor->num_dimensions()) : axis_raw;
+
+ const auto data_type = input_tensor->data_type();
+
+ switch (data_type)
+ {
+ case ir::DataType::FLOAT32:
+ invoke<float>(input_tensor, indices_tensor, output_tensor, axis);
+ break;
+ case ir::DataType::INT32:
+ invoke<int32_t>(input_tensor, indices_tensor, output_tensor, axis);
+ break;
+ case ir::DataType::QUANT8_ASYMM:
+ invoke<uint8_t>(input_tensor, indices_tensor, output_tensor, axis);
+ break;
+ default:
+ throw std::runtime_error{"Interp(Gather): NYI - Not supported type"};
+ }
+}
+
+} // namespace concat
+
+OpKernel *getGather()
+{
+ static OpKernel kernel = {prepareGather, invokeGather};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/InstanceNorm.cc b/runtime/neurun/core/src/exec/interp/operations/InstanceNorm.cc
new file mode 100644
index 000000000..d1623d53c
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/operations/InstanceNorm.cc
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/InstanceNorm.h>
+
+#include "OperationUtil.h"
+
+#include "exec/interp/Registration.h"
+#include "ir/operation/InstanceNorm.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+namespace instancenorm
+{
+
+void prepareInstanceNorm(ExecEnv *env, const ir::Operation &node)
+{
+ const auto &instancenorm_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
+
+ const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
+ const auto output_index = node.getOutputs().at(0);
+ const auto input_tensor = env->tensorAt(input_index);
+
+ if (input_tensor->num_dimensions() != 4)
+ {
+ throw std::runtime_error{"Interp(InstanceNorm): Input should be 4D-tensor"};
+ }
+
+ // Output shape should be same with input
+ env->allocateIfNeeded(output_index, input_tensor->tensorInfo());
+
+ auto output_tensor = env->tensorAt(output_index);
+ UNUSED_RELEASE(output_tensor);
+
+ // Handle same ifm & ofm data type only
+ assert(input_tensor->data_type() == output_tensor->data_type());
+ assert(input_tensor->tensorInfo().shape() == output_tensor->tensorInfo().shape());
+}
+
+inline void setActivationParams(float min, float max, nnfw::cker::InstanceNormParams *params)
+{
+ params->float_activation_min = min;
+ params->float_activation_max = max;
+}
+
+void invoke(const ITensor *input_tensor, const ITensor *gamma_tensor, const ITensor *beta_tensor,
+ const ITensor *output_tensor, const ir::operation::InstanceNorm::Param &param)
+{
+ // Calculate
+ float activation_min, activation_max;
+ calculateActivationRange(param.activation, &activation_min, &activation_max);
+
+ nnfw::cker::InstanceNormParams cker_param;
+ cker_param.epsilon = param.epsilon;
+ cker_param.float_activation_min = activation_min;
+ cker_param.float_activation_max = activation_max;
+
+ const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
+ const auto cker_gamma_shape = convertShape(gamma_tensor->tensorInfo().shape());
+ const auto cker_beta_shape = convertShape(beta_tensor->tensorInfo().shape());
+ const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
+ const float *input_ptr = reinterpret_cast<const float *>(input_tensor->bufferRO());
+ const float *gamma_ptr = reinterpret_cast<const float *>(gamma_tensor->bufferRO());
+ const float *beta_ptr = reinterpret_cast<const float *>(beta_tensor->bufferRO());
+ float *output_ptr = reinterpret_cast<float *>(output_tensor->buffer());
+
+ nnfw::cker::InstanceNorm(cker_param, cker_input_shape, input_ptr, cker_gamma_shape, gamma_ptr,
+ cker_beta_shape, beta_ptr, cker_output_shape, output_ptr);
+}
+
+void invokeInstanceNorm(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &instancenorm_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
+
+ const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
+ const auto gamma_index = node.getInputs().at(instancenorm_node.GAMMA);
+ const auto beta_index = node.getInputs().at(instancenorm_node.BETA);
+ const auto out_index = node.getOutputs().at(0);
+ const auto input_tensor = env->tensorAt(input_index);
+ const auto gamma_tensor = env->tensorAt(gamma_index);
+ const auto beta_tensor = env->tensorAt(beta_index);
+ const auto out_tensor = env->tensorAt(out_index);
+ const auto data_type = input_tensor->data_type();
+
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(input_tensor, gamma_tensor, beta_tensor, out_tensor, instancenorm_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Unsupported data type"};
+ }
+}
+} // namespace instancenorm
+
+OpKernel *getInstanceNorm()
+{
+ static OpKernel kernel = {instancenorm::prepareInstanceNorm, instancenorm::invokeInstanceNorm};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/Logistic.cc b/runtime/neurun/core/src/exec/interp/operations/Logistic.cc
new file mode 100644
index 000000000..2fc68ffd2
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/operations/Logistic.cc
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/Logistic.h>
+
+#include "OperationUtil.h"
+
+#include "exec/interp/Registration.h"
+#include "ir/operation/Logistic.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+namespace
+{
+
+void prepareLogistic(ExecEnv *env, const ir::Operation &node)
+{
+ const auto input_index = node.getInputs().at(0);
+ const auto output_index = node.getOutputs().at(0);
+
+ const auto input_tensor = env->tensorAt(input_index);
+
+ const auto output_info = env->graph().operands().at(output_index).info();
+
+ // Check shape and type lhs is same with rhs
+ // TODO Util function to compare TensorInfo
+ if (output_info.total_size() == 0)
+ {
+ throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"};
+ }
+ else
+ {
+ env->allocateIfNeeded(output_index, output_info);
+ }
+
+ const auto output_tensor = env->tensorAt(output_index);
+ if (input_tensor->data_type() != output_tensor->data_type())
+ {
+ throw std::runtime_error{"Interp(Logistic): Invalid output type"};
+ }
+}
+
+void invoke(const ITensor *input_tensor, const ITensor *output_tensor)
+{
+ const auto input_buffer = input_tensor->bufferRO();
+ auto output_buffer = output_tensor->buffer();
+
+ const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
+ const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
+ const float *input_ptr = reinterpret_cast<const float *>(input_buffer);
+ float *output_ptr = reinterpret_cast<float *>(output_buffer);
+
+ nnfw::cker::Logistic(cker_input_shape, input_ptr, cker_output_shape, output_ptr);
+}
+
+void invokeLogistic(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto input_index = node.getInputs().at(0);
+ const auto output_index = node.getOutputs().at(0);
+
+ const auto input_tensor = env->tensorAt(input_index);
+ const auto output_tensor = env->tensorAt(output_index);
+
+ const auto data_type = input_tensor->data_type();
+
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(input_tensor, output_tensor);
+ }
+ else
+ {
+ throw std::runtime_error{"Interp(Logistic): NYI - Unsupported data type"};
+ }
+}
+} // namespace
+
+OpKernel *getLogistic()
+{
+ static OpKernel kernel = {prepareLogistic, invokeLogistic};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/MaxPool2D.cc b/runtime/neurun/core/src/exec/interp/operations/MaxPool2D.cc
new file mode 100644
index 000000000..3e1711d8e
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/operations/MaxPool2D.cc
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/MaxPool.h>
+
+#include "OperationUtil.h"
+
+#include "exec/interp/Registration.h"
+#include "ir/operation/MaxPool2D.h"
+#include "util/Utils.h"
+#include "util/Padding.h"
+#include "util/ShapeInference.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+namespace
+{
+
+void prepareMaxPool2D(ExecEnv *env, const ir::Operation &node)
+{
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto in_tensor = env->tensorAt(in_index);
+
+ assert(in_tensor->num_dimensions() == 4);
+ UNUSED_RELEASE(in_tensor);
+
+ const auto output_info = env->graph().operands().at(out_index).info();
+ if (output_info.total_size() == 0)
+ {
+ // Handle unspecified output shape
+ const auto &maxpool_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node);
+ const auto infered_output_shapes =
+ shape_inference::inferMaxPoolShape(in_tensor->tensorInfo().shape(), maxpool_node.param());
+ env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()});
+ }
+ else
+ {
+ env->allocateIfNeeded(out_index, output_info);
+ }
+
+ auto out_tensor = env->tensorAt(out_index);
+ UNUSED_RELEASE(out_tensor);
+
+ // Handle same ifm & ofm data type only
+ assert(in_tensor->data_type() == out_tensor->data_type());
+ assert(out_tensor->num_dimensions() == 4);
+}
+
+void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
+ const ir::operation::MaxPool2D::Param &param)
+{
+ // TODO support NCHW frontend
+ const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape,
+ param.stride, param.kw, param.kh);
+ // Calculate
+ nnfw::cker::PoolParams cker_param;
+ calculateActivationRange(param.activation, &cker_param.float_activation_min,
+ &cker_param.float_activation_max);
+ cker_param.filter_width = param.kw;
+ cker_param.filter_height = param.kh;
+ cker_param.padding_values.width = padding.left;
+ cker_param.padding_values.height = padding.top;
+ cker_param.stride_width = param.stride.horizontal;
+ cker_param.stride_height = param.stride.vertical;
+
+ const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
+ const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
+ const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
+ float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
+
+ nnfw::cker::MaxPool(cker_param, in_shape, in_ptr, out_shape, out_ptr);
+}
+
+void invokeMaxPool2D(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &maxpool_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node);
+
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto in_tensor = env->tensorAt(in_index);
+ const auto out_tensor = env->tensorAt(out_index);
+
+ const auto data_type = in_tensor->data_type();
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(in_tensor, out_tensor, maxpool_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Support float32 only"};
+ }
+}
+} // namespace
+
+OpKernel *getMaxPool2D()
+{
+ static OpKernel kernel = {prepareMaxPool2D, invokeMaxPool2D};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/OperationUtil.h b/runtime/neurun/core/src/exec/interp/operations/OperationUtil.h
new file mode 100644
index 000000000..5f4146bb8
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/operations/OperationUtil.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_
+#define __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_
+
+#include "ir/Shape.h"
+#include "ir/InternalType.h"
+
+#include <cker/Shape.h>
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+
+inline nnfw::cker::Shape convertShape(const ir::Shape &shape)
+{
+ auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end());
+
+ std::vector<int32_t> raw_shape;
+ raw_shape.resize(4);
+
+ for (uint32_t i = 0; i < 4; ++i)
+ {
+ if (i >= dimensions.size())
+ {
+ raw_shape[i] = 1;
+ }
+ else
+ {
+ raw_shape[i] = dimensions[i];
+ }
+ }
+
+ return nnfw::cker::GetShape(raw_shape);
+}
+
+inline nnfw::cker::Shape convertExtendShape(const ir::Shape &shape)
+{
+ auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end());
+
+ std::vector<int32_t> raw_shape;
+ raw_shape.resize(4);
+ uint32_t start = 4 - dimensions.size();
+
+ for (uint32_t i = 0; i < 4; ++i)
+ {
+ if (i < start)
+ {
+ raw_shape[i] = 1;
+ }
+ else
+ {
+ raw_shape[i] = dimensions[i - start];
+ }
+ }
+
+ return nnfw::cker::GetShape(raw_shape);
+}
+
+template <typename T>
+void calculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
+{
+ if (activation == ir::Activation::RELU)
+ {
+ *activation_min = 0;
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else if (activation == ir::Activation::RELU6)
+ {
+ *activation_min = 0;
+ *activation_max = 6;
+ }
+ else if (activation == ir::Activation::RELU1)
+ {
+ *activation_min = -1;
+ *activation_max = 1;
+ }
+ else if (activation == ir::Activation::NONE)
+ {
+ *activation_min = std::numeric_limits<T>::lowest();
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else
+ {
+ throw std::runtime_error{"Unsupported activation type"};
+ }
+}
+
+inline ir::Shape calcBroadcastShape(const ir::Shape &lhs, const ir::Shape &rhs, bool &success)
+{
+ int lhs_rank = lhs.rank();
+ int rhs_rank = rhs.rank();
+
+ int out_rank = (lhs_rank > rhs_rank ? lhs_rank : rhs_rank);
+ ir::Shape out_shape(out_rank);
+
+ int lhs_idim = lhs_rank - 1;
+ int rhs_idim = rhs_rank - 1;
+ success = true;
+ for (int out_idim = out_rank - 1; out_idim >= 0; out_idim--)
+ {
+ if (lhs_idim == -1 && rhs_idim == -1)
+ {
+ // invalid result
+ success = false;
+ break;
+ }
+
+ if (lhs_idim == -1)
+ {
+ out_shape.dim(out_idim) = rhs.dim(rhs_idim);
+ rhs_idim--;
+ }
+ else if (rhs_idim == -1)
+ {
+ out_shape.dim(out_idim) = lhs.dim(lhs_idim);
+ lhs_idim--;
+ }
+ else
+ {
+ if (lhs.dim(lhs_idim) == rhs.dim(rhs_idim))
+ {
+ out_shape.dim(out_idim) = lhs.dim(lhs_idim);
+ lhs_idim--;
+ rhs_idim--;
+ }
+ else if (lhs.dim(lhs_idim) == 1)
+ {
+ out_shape.dim(out_idim) = rhs.dim(rhs_idim);
+ lhs_idim--;
+ rhs_idim--;
+ }
+ else if (rhs.dim(rhs_idim) == 1)
+ {
+ out_shape.dim(out_idim) = lhs.dim(lhs_idim);
+ lhs_idim--;
+ rhs_idim--;
+ }
+ else
+ {
+ // invalid result
+ success = false;
+ break;
+ }
+ }
+ }
+
+ if (lhs_idim != -1 || rhs_idim != -1)
+ {
+ // invalid result
+ success = false;
+ }
+ return out_shape;
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
+
+#endif // __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_
diff --git a/runtime/neurun/core/src/exec/interp/operations/Pad.cc b/runtime/neurun/core/src/exec/interp/operations/Pad.cc
new file mode 100644
index 000000000..0c8267a90
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/operations/Pad.cc
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/Pad.h>
+
+#include "OperationUtil.h"
+
+#include "exec/interp/Registration.h"
+#include "ir/operation/Pad.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+namespace
+{
+
+void preparePad(ExecEnv *env, const ir::Operation &node)
+{
+ const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT);
+ const auto output_index = node.getOutputs().at(0);
+
+ const auto input_tensor = env->tensorAt(input_index);
+
+ const auto output_info = env->graph().operands().at(output_index).info();
+
+ // Check shape and type lhs is same with rhs
+ // TODO Util function to compare TensorInfo
+ if (output_info.total_size() == 0)
+ {
+ throw std::runtime_error{"Interp(Pad): NYI unspecified output shape"};
+ }
+ else
+ {
+ env->allocateIfNeeded(output_index, output_info);
+ }
+
+ const auto output_tensor = env->tensorAt(output_index);
+ if (input_tensor->data_type() != output_tensor->data_type())
+ {
+ throw std::runtime_error{"Interp(Pad): Invalid output type"};
+ }
+}
+
+void invoke(const ITensor *input_tensor, const ITensor *pad_tensor, const ITensor *output_tensor)
+{
+ const auto input_buffer = input_tensor->bufferRO();
+ const auto pad_buffer = pad_tensor->bufferRO();
+ auto output_buffer = output_tensor->buffer();
+
+ int32_t pad_rank = pad_tensor->dimension(0);
+
+ const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
+ const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
+ const float *input_ptr = reinterpret_cast<const float *>(input_buffer);
+ const int32_t *pad_ptr = reinterpret_cast<const int32_t *>(pad_buffer);
+ float *output_ptr = reinterpret_cast<float *>(output_buffer);
+
+ nnfw::cker::Pad(pad_ptr, pad_rank, cker_input_shape, input_ptr, cker_output_shape, output_ptr,
+ nullptr);
+}
+
+void invokePad(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT);
+ const auto pad_index = node.getInputs().at(ir::operation::Pad::PAD);
+ const auto output_index = node.getOutputs().at(0);
+
+ const auto input_tensor = env->tensorAt(input_index);
+ const auto pad_tensor = env->tensorAt(pad_index);
+ const auto output_tensor = env->tensorAt(output_index);
+
+ const auto data_type = input_tensor->data_type();
+
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(input_tensor, pad_tensor, output_tensor);
+ }
+ else
+ {
+ throw std::runtime_error{"Interp(Pad): NYI - Unsupported data type"};
+ }
+}
+} // namespace
+
+OpKernel *getPad()
+{
+ static OpKernel kernel = {preparePad, invokePad};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/Reshape.cc b/runtime/neurun/core/src/exec/interp/operations/Reshape.cc
new file mode 100644
index 000000000..a160232de
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/operations/Reshape.cc
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/interp/Registration.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+namespace
+{
+
+void prepare(ExecEnv *env, const ir::Operation &node)
+{
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ // Unspecified shape is not supported in operation node spec now
+ const auto output_info = env->graph().operands().at(out_index).info();
+ env->allocateAndShareIfNeeded(out_index, output_info, in_index);
+
+ assert(output_info.total_size() == env->graph().operands().at(in_index).info().total_size());
+}
+
+void invoke(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ if (env->tensorAt(in_index)->bufferRO() == env->tensorAt(out_index)->bufferRO())
+ {
+ // Same data
+ return;
+ }
+
+ const auto output_info = env->graph().operands().at(out_index).info();
+ memcpy(env->tensorAt(out_index)->buffer(), env->tensorAt(in_index)->bufferRO(),
+ output_info.total_size());
+}
+
+} // namespace {anonymous}
+
+OpKernel *getReshape()
+{
+ static OpKernel kernel = {prepare, invoke};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/SoftMax.cc b/runtime/neurun/core/src/exec/interp/operations/SoftMax.cc
new file mode 100644
index 000000000..91d98889f
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/operations/SoftMax.cc
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/SoftMax.h>
+
+#include "OperationUtil.h"
+
+#include "exec/interp/Registration.h"
+#include "ir/operation/Softmax.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+namespace
+{
+
+void Softmax2D(const float *in, const int input_size, const int batch_size, const float beta,
+ float *out)
+{
+ assert(input_size > 0);
+
+ // For each batch
+ for (int b = 0; b < batch_size; b++)
+ {
+ // Find the max coeff.
+ float max_coeff = in[0];
+ for (int i = 1; i < input_size; i++)
+ {
+ if (in[i] > max_coeff)
+ max_coeff = in[i];
+ }
+
+ // Compute the normalized sum of exps.
+ float exp_sum = 0.0;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] = std::exp((in[i] - max_coeff) * beta);
+ exp_sum += out[i];
+ }
+
+ // Divide by the sum of exps.
+ float reciprocal_sum_exp = 1.f / exp_sum;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] *= reciprocal_sum_exp;
+ }
+
+ // Advance in and out pointers for the next batch.
+ in += input_size;
+ out += input_size;
+ }
+}
+
+void prepareSoftMax(ExecEnv *env, const ir::Operation &node)
+{
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto in_tensor = env->tensorAt(in_index);
+ UNUSED_RELEASE(in_tensor);
+
+ assert((in_tensor->num_dimensions() == 4) || (in_tensor->num_dimensions() == 2));
+
+ // Output shape should be same with input
+ // Output type is pre-defined in model
+ const auto output_shape = env->graph().operands().at(in_index).info().shape();
+ const auto output_type = env->graph().operands().at(out_index).info().typeInfo();
+
+ const ir::OperandInfo output_info{output_shape, output_type};
+ env->allocateIfNeeded(out_index, output_info);
+
+ auto out_tensor = env->tensorAt(out_index);
+ UNUSED_RELEASE(out_tensor);
+
+ // Check output shape is same with input
+ assert(out_tensor->num_dimensions() == out_tensor->num_dimensions());
+ for (uint32_t i = 0; i < in_tensor->num_dimensions(); i++)
+ {
+ assert(in_tensor->dimension(i) == out_tensor->dimension(i));
+ }
+}
+
+void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
+ const ir::operation::Softmax::Param &param)
+{
+ const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
+ float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
+
+ float beta = param.beta;
+
+ if (in_tensor->num_dimensions() == 2)
+ {
+ uint32_t batch_size = in_tensor->dimension(0);
+ uint32_t input_size = in_tensor->dimension(1);
+
+ Softmax2D(in_ptr, input_size, batch_size, beta, out_ptr);
+ }
+ else if (in_tensor->num_dimensions() == 4)
+ {
+ const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
+ const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
+
+ nnfw::cker::SoftmaxParams cker_param;
+ cker_param.beta = beta;
+
+ nnfw::cker::Softmax(cker_param, in_shape, in_ptr, out_shape, out_ptr);
+ }
+ else
+ {
+ throw std::runtime_error{"Unsuported input dimension: support 2D or 4D"};
+ }
+}
+
+void invokeSoftMax(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &softmax_node = nnfw::misc::polymorphic_downcast<const ir::operation::Softmax &>(node);
+
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto in_tensor = env->tensorAt(in_index);
+ const auto out_tensor = env->tensorAt(out_index);
+
+ const auto in_data_type = in_tensor->data_type();
+ const auto out_data_type = out_tensor->data_type();
+ if ((in_data_type == ir::DataType::FLOAT32) && (out_data_type == ir::DataType::FLOAT32))
+ {
+ invoke(in_tensor, out_tensor, softmax_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Support float32 only"};
+ }
+}
+
+} // namespace
+
+OpKernel *getSoftMax()
+{
+ static OpKernel kernel = {prepareSoftMax, invokeSoftMax};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/TransposeConv.cc b/runtime/neurun/core/src/exec/interp/operations/TransposeConv.cc
new file mode 100644
index 000000000..70b72c88d
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/operations/TransposeConv.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/TransposeConv.h>
+#include <misc/polymorphic_downcast.h>
+
+#include "OperationUtil.h"
+
+#include "exec/interp/Registration.h"
+#include "ir/operation/TransposeConv.h"
+#include "util/Padding.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+namespace
+{
+
+void prepareTransposeConv(ExecEnv *env, const ir::Operation &node)
+{
+ const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
+ const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
+ const auto ofm_shape_index = node.getInputs().at(ir::operation::TransposeConv::OUTPUT_SHAPE);
+ const auto ofm_index = node.getOutputs().at(0);
+
+ const auto ifm_tensor = env->tensorAt(ifm_index);
+ const auto ker_tensor = env->tensorAt(ker_index);
+ const auto ofm_shape_tensor = env->tensorAt(ofm_shape_index);
+
+ assert(ifm_tensor->num_dimensions() == 4);
+ assert(ker_tensor->num_dimensions() == 4);
+ assert(ofm_shape_tensor->num_dimensions() == 1);
+
+ UNUSED_RELEASE(ifm_tensor);
+ UNUSED_RELEASE(ker_tensor);
+ UNUSED_RELEASE(ofm_shape_tensor);
+
+ const auto output_info = env->graph().operands().at(ofm_index).info();
+ if (output_info.total_size() == 0)
+ {
+ // TODO: Handle unspecified output shape
+ throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"};
+ }
+ else
+ {
+ env->allocateIfNeeded(ofm_index, output_info);
+ }
+
+ auto ofm_tensor = env->tensorAt(ofm_index);
+ UNUSED_RELEASE(ofm_tensor);
+
+ // Handle same ifm & ofm data type only
+ if (ifm_tensor->data_type() != ofm_tensor->data_type())
+ {
+ throw std::runtime_error{"Interp(TConv): Different I/O data dype"};
+ }
+
+ if (ofm_tensor->num_dimensions() != 4)
+ {
+ throw std::runtime_error{"Interp(TConv): Invalid output rank"};
+ }
+}
+
+void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *ofm_tensor,
+ const ir::operation::TransposeConv::Param &param)
+{
+ const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto ker_shape = ker_tensor->tensorInfo().shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+ const auto padding = neurun::util::calculatePadding(param.padding, ofm_shape, ifm_shape,
+ param.stride, ker_width, ker_height);
+
+ nnfw::cker::TransposeConvParams cker_param;
+ cker_param.padding_values.width = padding.left;
+ cker_param.padding_values.height = padding.top;
+ cker_param.stride_width = param.stride.horizontal;
+ cker_param.stride_height = param.stride.vertical;
+ cker_param.dilation_width_factor = 1;
+ cker_param.dilation_height_factor = 1;
+
+ const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
+ const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
+ const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
+ const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
+ const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
+ float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
+
+ nnfw::cker::TransposeConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
+ cker_ofm_shape, ofm_ptr);
+}
+
+void invokeTransposeConv(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &tconv_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::TransposeConv &>(node);
+
+ const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
+ const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
+ const auto ofm_index = node.getOutputs().at(0);
+
+ const auto ifm_tensor = env->tensorAt(ifm_index);
+ const auto ker_tensor = env->tensorAt(ker_index);
+ const auto ofm_tensor = env->tensorAt(ofm_index);
+
+ const auto data_type = ifm_tensor->data_type();
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(ifm_tensor, ker_tensor, ofm_tensor, tconv_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"Interp(TConv): Support float32 only"};
+ }
+}
+
+} // namespace transposeconv
+
+OpKernel *getTransposeConv()
+{
+ static OpKernel kernel = {prepareTransposeConv, invokeTransposeConv};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/UnaryActivations.cc b/runtime/neurun/core/src/exec/interp/operations/UnaryActivations.cc
new file mode 100644
index 000000000..116806fc4
--- /dev/null
+++ b/runtime/neurun/core/src/exec/interp/operations/UnaryActivations.cc
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cmath>
+
+#include "OperationUtil.h"
+
+#include "exec/interp/Registration.h"
+
+#include "ir/operation/ReLU.h"
+#include "ir/operation/ReLU1.h"
+#include "ir/operation/ReLU6.h"
+#include "ir/operation/Tanh.h"
+
+namespace neurun
+{
+namespace exec
+{
+namespace interp
+{
+namespace
+{
+
+enum class ActivationType
+{
+ ReLU,
+ ReLU1,
+ ReLU6,
+ Tanh
+};
+
+void prepare(ExecEnv *env, const ir::Operation &node)
+{
+ const auto input_index = node.getInputs().at(0);
+ const auto output_index = node.getOutputs().at(0);
+
+ const auto input_tensor = env->tensorAt(input_index);
+
+ const auto output_info = env->graph().operands().at(output_index).info();
+ if (output_info.total_size() == 0)
+ {
+ // Output's shape and type is same with input
+ auto input_info = input_tensor->tensorInfo();
+ // We can handle already allocated (ex. model output)
+ env->allocateIfNeeded(output_index, input_info);
+ }
+ else
+ {
+ env->allocateIfNeeded(output_index, output_info);
+ }
+
+ const auto output_tensor = env->tensorAt(output_index);
+ // Check shape and type lhs is same with output
+ // TODO Util function to compare TensorInfo
+ if (input_tensor->data_type() != output_tensor->data_type())
+ {
+ throw std::runtime_error{"Interp(Activations): Invalid output type"};
+ }
+}
+
+template <ActivationType act_type>
+void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements)
+{
+ std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); };
+ switch (act_type)
+ {
+ case ActivationType::ReLU:
+ fn = [](const float &in) { return std::max(0.f, in); };
+ break;
+ case ActivationType::ReLU1:
+ fn = [](const float &in) { return std::min(std::max(-1.f, in), 1.f); };
+ break;
+ case ActivationType::ReLU6:
+ fn = [](const float &in) { return std::min(std::max(0.f, in), 6.f); };
+ break;
+ case ActivationType::Tanh:
+ fn = [](const float &in) { return std::tanh(in); };
+ break;
+ default:
+ throw std::runtime_error{"Interp(Activations): NYI - Unsupported activation"};
+ break;
+ }
+
+ const float *input_end = input_ptr + num_elements;
+ for (; input_ptr < input_end; input_ptr++, output_ptr++)
+ {
+ *output_ptr = fn(*input_ptr);
+ }
+}
+
+template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto input_index = node.getInputs().at(0);
+ const auto output_index = node.getOutputs().at(0);
+
+ // Check lhs shape is same with rhs (with broadcast)
+ const auto input_tensor = env->tensorAt(input_index);
+ const auto output_tensor = env->tensorAt(output_index);
+
+ const auto data_type = input_tensor->data_type();
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ uint64_t elements = input_tensor->num_elements();
+ const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO());
+ float *out = reinterpret_cast<float *>(output_tensor->buffer());
+
+ evalFloat<act_type>(input_start, out, elements);
+ }
+ else
+ {
+ throw std::runtime_error{"Interp(ReLU6): NYI - Support float only"};
+ }
+}
+
+} // namespace
+
+OpKernel *getReLU()
+{
+ static OpKernel kernel = {prepare, invoke<ActivationType::ReLU>};
+ return &kernel;
+}
+
+OpKernel *getReLU1()
+{
+ static OpKernel kernel = {prepare, invoke<ActivationType::ReLU1>};
+ return &kernel;
+}
+
+OpKernel *getReLU6()
+{
+ static OpKernel kernel = {prepare, invoke<ActivationType::ReLU6>};
+ return &kernel;
+}
+
+OpKernel *getTanh()
+{
+ static OpKernel kernel = {prepare, invoke<ActivationType::Tanh>};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace exec
+} // namespace neurun