diff options
Diffstat (limited to 'runtime/neurun/core/src/exec/interp')
25 files changed, 3274 insertions, 0 deletions
diff --git a/runtime/neurun/core/src/exec/interp/Buffer.h b/runtime/neurun/core/src/exec/interp/Buffer.h new file mode 100644 index 000000000..d60b59a2f --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Buffer.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Buffer.h + * @brief This file contains Buffer interface and InternalBuffer, ExternalBuffer class + */ +#ifndef __NEURUN_EXEC_INTERP_BUFFER_H__ +#define __NEURUN_EXEC_INTERP_BUFFER_H__ + +#include <cpp14/memory.h> + +#include "ir/Data.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Interface for writable data area + */ +class Buffer : public ir::Data +{ +public: + /** + * @brief Return writable pointer for data area + * @return Writable pointer + */ + virtual uint8_t *baseWritable(void) const = 0; +}; + +/** + * @brief Class for internally allocated data area + */ +class InternalBuffer final : public Buffer +{ +public: + InternalBuffer(size_t size) : _base{nnfw::cpp14::make_unique<uint8_t[]>(size)}, _size{size} + { + // DO NOTHING + } + +public: + size_t size(void) const override { return _size; } + const uint8_t *base(void) const override { return _base.get(); } + uint8_t *baseWritable(void) const override { return _base.get(); } + +private: + std::unique_ptr<uint8_t[]> _base; + size_t _size; +}; + +/** + * @brief Class for data area from outside + */ +class ExternalBuffer final : public Buffer +{ +public: + ExternalBuffer(uint8_t *base, size_t size) : _base{base}, _size{size} + { + // DO NOTHING + } + +public: + size_t size(void) const override { return _size; } + const uint8_t *base(void) const override { return _base; } + uint8_t *baseWritable(void) const override { return _base; } + +private: + uint8_t *_base; + size_t _size; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_BUFFER_H__ diff --git a/runtime/neurun/core/src/exec/interp/ExecEnv.h b/runtime/neurun/core/src/exec/interp/ExecEnv.h new file mode 100644 index 000000000..0f7d45e2a --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/ExecEnv.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file ExecEnv.h + * @brief This file contains ExecEnv to access interpreter tensor and execution status + */ +#ifndef __NEURUN_EXEC_INTERP_EXEC_ENV_H_ +#define __NEURUN_EXEC_INTERP_EXEC_ENV_H_ + +#include <unordered_set> + +#include "ir/Graph.h" +#include "Tensor.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Class to gather interpreter execution environment + * Each interpreter instance own execution environment + */ +class ExecEnv +{ +public: + /** + * @brief Construct a new Exec Env object (deleted) + */ + ExecEnv(void) = delete; + /** + * @brief Construct a new ExecEnv object + * @param[in] graph Graph to execute by interpreter + */ + explicit ExecEnv(const ir::Graph &graph) : _graph(graph) + { + // DO NOTHING + } + +public: + /** + * @brief Return graph to execute + * @return Graph + */ + const ir::Graph &graph(void) const { return _graph; } + /** + * @brief Assign tensor to environment which have allocated or assigned buffer + * @param[in] index Tensor index + * @param[in] tensor Tensor + */ + void assignTensor(const ir::OperandIndex index, std::shared_ptr<ITensor> tensor) + { + assert(tensor->bufferRO() != nullptr); + _tensors.emplace(index, tensor); + } + + /** + * @brief Return tensor pointer in environment + * @param[in] index Tensor index + * @return Tensor pointer + */ + const ITensor *tensorAt(const ir::OperandIndex index) const { return _tensors.at(index).get(); } + + /** + * @brief Check environment contains tensor + * @param[in] index Tensor index + * @return @c true if environment contain tensor, otherwise @c false + */ + bool contains(const ir::OperandIndex index) const + { + return (_tensors.find(index) != _tensors.end()); + } + + /** + * @brief Allocate tensor using operand info + * @param[in] index Tensor index + * @param[in] info Operand info + * @note If already allocated, just return + * @TODO More smart allocation policy + */ + void allocateIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info) + { + // already allocated, or constant + if (contains(index)) + { + return; + } + + auto tensor = std::make_shared<Tensor>(info); + tensor->setBuffer(std::make_shared<InternalBuffer>(tensor->total_size())); + assignTensor(index, tensor); + _buffers.insert(index); + } + + /** + * @brief Allocate read-only tensor and share data with other tensor + * @param[in] index Tensor index + * @param[in] info Operand info + * @param[in] index_to_share Tensor index that have data to share + */ + void allocateAndShareIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info, + const ir::OperandIndex index_to_share) + { + if (!contains(index_to_share)) + { + throw std::runtime_error{"Cannot find tensor to share data"}; + } + + // already allocated + if (contains(index)) + { + return; + } + else + { + auto tensor = std::make_shared<ROTensor>(info); + tensor->setData(tensorAt(index_to_share)->shareData()); + assignTensor(index, tensor); + _buffers.insert(index); + } + } + + /** + * @brief Free buffer if allocated by allocateIfNeed + * @param[in] index Tensor index + * @note If allocated by outside, just return + */ + void freeIfAllocated(const ir::OperandIndex index) + { + if (_buffers.find(index) != _buffers.end()) + { + _tensors.at(index)->releaseData(); + } + } + +private: + const ir::Graph &_graph; + // Tensor map to use in interpreter + // It should map tensors that have allocated or assigned buffer pointer + std::unordered_map<ir::OperandIndex, std::shared_ptr<ITensor>> _tensors; + // Tensors allocated by allocateIfNeed (buffer) + std::unordered_set<ir::OperandIndex> _buffers; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_EXEC_ENV_H_ diff --git a/runtime/neurun/core/src/exec/interp/ExecManager.cc b/runtime/neurun/core/src/exec/interp/ExecManager.cc new file mode 100644 index 000000000..92f182c06 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/ExecManager.cc @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ExecManager.h" +#include "ExecEnv.h" +#include "Interpreter.h" + +#include "util/logging.h" + +#include <cpp14/memory.h> + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +void ExecManager::execute(const IODescription &desc) +{ + /************************************************************************ + * Prepare execution model (submodel) + It may execute divided model + but now consider model inference is done at interpreter + ***********************************************************************/ + ir::OperandIndexMap<std::shared_ptr<ITensor>> tensor_map; + + for (uint32_t n = 0; n < _graph.getInputs().size(); n++) + { + ir::IOIndex index{n}; + const auto input_index = _graph.getInputs().at(index); + const auto &input = *desc.inputs.at(n); + + auto input_tensor = std::make_shared<ROTensor>(input.info); + input_tensor->setData(std::make_shared<const ir::ExternalData>( + reinterpret_cast<const uint8_t *>(input.buffer), input.size)); + tensor_map[input_index] = input_tensor; + } + + for (uint32_t n = 0; n < _graph.getOutputs().size(); n++) + { + ir::IOIndex index{n}; + const auto output_index = _graph.getOutputs().at(index); + const auto &output = *desc.outputs.at(n); + + auto output_tensor = std::make_shared<Tensor>(output.info); + output_tensor->setBuffer( + std::make_shared<ExternalBuffer>(reinterpret_cast<uint8_t *>(output.buffer), output.size)); + tensor_map[output_index] = output_tensor; + } + + /************************************************************************ + * Prepare execution environment + Execution environment will be assigned to invoked interpreter instance + ***********************************************************************/ + + std::unique_ptr<ExecEnv> interp_env = nnfw::cpp14::make_unique<ExecEnv>(_graph); + + // Assign input tensor into interpreter execution environment + for (auto index : _graph.getInputs()) + { + if (tensor_map.find(index) != tensor_map.end()) + { + VERBOSE(INTERPRETER) << "Assign input tensor. operand index:" << index.value() << std::endl; + interp_env->assignTensor(index, tensor_map.at(index)); + } + } + + // Assign output tensor into interpreter execution environment + for (auto index : _graph.getOutputs()) + { + if (tensor_map.find(index) != tensor_map.end()) + { + VERBOSE(INTERPRETER) << "Assign output tensor. operand index: " << index.value() << std::endl; + interp_env->assignTensor(index, tensor_map.at(index)); + } + } + + // Allocate constant tensor + _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { + if (obj.isConstant()) + { + VERBOSE(INTERPRETER) << "Allocate and assign constant tensor. operand index:" << ind.value() + << std::endl; + + auto const_tensor = std::make_shared<ROTensor>(obj.info()); + // Assume that interpreter's tensor layout is same with model (NHWC) + const_tensor->setData( + std::make_shared<ir::ExternalData>(obj.data().base(), obj.info().total_size())); + interp_env->assignTensor(ind, const_tensor); + } + }); + + /***************************************************************************** + * Invoke interpreter + ****************************************************************************/ + + Interpreter interp(std::move(interp_env)); + interp.run(); + + /***************************************************************************** + * Invoked interpreter run is finished + ****************************************************************************/ + + // If interpreter execute submodel + // 1. Get tensor output of submodel into tensor_map to save result + // 2. Generate new ExecEnv for next interpretation +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/ExecManager.h b/runtime/neurun/core/src/exec/interp/ExecManager.h new file mode 100644 index 000000000..f952abf02 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/ExecManager.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file ExecManager.h + * @brief This file contains ExecManager class\n + * to manage interpreter execution and environment + */ +#ifndef __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_ +#define __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_ + +#include "ir/Graph.h" +#include "exec/IExecutor.h" +#include "Tensor.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Class to execute model using interpreter + */ +class ExecManager final : public IExecutor +{ +public: + explicit ExecManager(const ir::Graph &graph) : _graph(graph) + { + // DO NOTHING + } + +public: + /** + * @brief Return graph object + * @return Graph object + */ + const ir::Graph &graph() final { return _graph; } + void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) override{ + // Not implemented + }; + /** + * @brief Start execution + * @note It should be called after setting input and output buffer + */ + void execute(const IODescription &desc) final; + +private: + const ir::Graph &_graph; + ir::OperandIndexMap<std::shared_ptr<ITensor>> _tensor_map; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_ diff --git a/runtime/neurun/core/src/exec/interp/Interpreter.cc b/runtime/neurun/core/src/exec/interp/Interpreter.cc new file mode 100644 index 000000000..8373419f6 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Interpreter.cc @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Interpreter.h" + +#include <stack> +#include <unordered_set> + +#include "Registration.h" + +#include "ir/OperandIndexMap.h" +#include "util/logging.h" +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +// TODO more structured execution kernel implementation +// TODO use cker for execution +// TODO divide tensor prepare and execution +// TODO introduce memory manager (buffer allocate and free) +class OperationExecutor : ir::OperationVisitor +{ +public: + OperationExecutor(ExecEnv *env) : _env{env} + { + _kernels[ir::OpCode::Add] = getAdd(); + _kernels[ir::OpCode::Sub] = getSub(); + _kernels[ir::OpCode::Mul] = getMul(); + _kernels[ir::OpCode::Conv2D] = getConv2D(); + _kernels[ir::OpCode::MaxPool2D] = getMaxPool2D(); + _kernels[ir::OpCode::Concat] = getConcat(); + _kernels[ir::OpCode::Gather] = getGather(); + _kernels[ir::OpCode::AvgPool2D] = getAvgPool2D(); + _kernels[ir::OpCode::FullyConnected] = getFullyConnected(); + _kernels[ir::OpCode::InstanceNorm] = getInstanceNorm(); + _kernels[ir::OpCode::Softmax] = getSoftMax(); + _kernels[ir::OpCode::Reshape] = getReshape(); + _kernels[ir::OpCode::DepthwiseConv2D] = getDepthwiseConv(); + _kernels[ir::OpCode::TransposeConv] = getTransposeConv(); + _kernels[ir::OpCode::Logistic] = getLogistic(); + _kernels[ir::OpCode::Pad] = getPad(); + _kernels[ir::OpCode::ReLU] = getReLU(); + _kernels[ir::OpCode::ReLU1] = getReLU1(); + _kernels[ir::OpCode::ReLU6] = getReLU6(); + _kernels[ir::OpCode::Tanh] = getTanh(); + } + + void execute(const ir::OperationIndex &idx) + { + const auto nodeName = _env->graph().operations().at(idx).name(); + VERBOSE(INTERPRETER) << "Prepare output operands and execute " << nodeName + << " operation (id: " << idx.value() << ")" << std::endl; + _env->graph().operations().at(idx).accept(*this); + } + +private: +#define OP(InternalName) \ + void visit(const ir::operation::InternalName &node) override \ + { \ + if (_kernels[ir::OpCode::InternalName]->prepare != nullptr) \ + { \ + _kernels[ir::OpCode::InternalName]->prepare(_env, node); \ + } \ + _kernels[ir::OpCode::InternalName]->invoke(_env, node); \ + } +#include "ir/Operations.lst" +#undef OP + +private: + ExecEnv *_env; + std::unordered_map<ir::OpCode, OpKernel *> _kernels; +}; + +void Interpreter::run() +{ + VERBOSE(INTERPRETER) << "Interpreter is invoked " << std::endl; + + // operand_stack: save operands prepared to use + std::stack<ir::OperandIndex> operand_stack; + + // Note: We should push input first, then constant. + // We use use-def for find operators ready to execution, + // but Use-Def cannot handle parameters (maybe constant, but not always) + // Note: If all model inputs are constant, it may not work (depend on tensors' order). + // But that scenario may not exist + for (auto ind : _env->graph().getInputs()) + { + VERBOSE(INTERPRETER) << "Input: Push to operand stack " << ind.value() << std::endl; + + operand_stack.push(ind); + } + + _env->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { + if (obj.isConstant()) + { + VERBOSE(INTERPRETER) << "Constant: Push to operand stack " << ind.value() << std::endl; + + operand_stack.push(ind); + } + }); + + // Execution + std::unordered_set<ir::OperandIndex> ready_check; + std::unordered_set<ir::OperationIndex> executed; + OperationExecutor executor{_env.get()}; + while (!operand_stack.empty()) + { + const auto current_operand_index = operand_stack.top(); + operand_stack.pop(); + VERBOSE(INTERPRETER) << "Poped operand " << current_operand_index.value() + << " is checked ready to use" << std::endl; + + assert(ready_check.find(current_operand_index) == ready_check.end()); + ready_check.insert(current_operand_index); + + // Find prepared operations by scan use of current operand + std::stack<ir::OperationIndex> operation_stack; + auto use_operators = std::list<ir::OperationIndex>( + _env->graph().operands().at(current_operand_index).getUses().list()); + // Remove operation index duplication + // If one operation uses same operand tensor for multiple input, + // use-list have duplicated operation index + use_operators.unique(); + for (auto use_operator : use_operators) + { + // Assumption: all parameters are ready to use + bool operator_ready = true; + for (auto input_index : _env->graph().operations().at(use_operator).getInputs()) + { + if (ready_check.find(input_index) == ready_check.end()) + { + operator_ready = false; + break; + } + } + + if (operator_ready) + { + VERBOSE(INTERPRETER) << "Ready to execute operation " << use_operator.value() << std::endl; + operation_stack.push(use_operator); + } + } + + while (!operation_stack.empty()) + { + const auto current_operation_index = operation_stack.top(); + operation_stack.pop(); + VERBOSE(INTERPRETER) << "Poped operation: " << current_operation_index.value() << "(" + << _env->graph().operations().at(current_operation_index).name() << ")" + << std::endl; + + // execution + // 1. Prepare output tensor + // 2. Call operation kernel + executor.execute(current_operation_index); + executed.insert(current_operation_index); + + // 3. Push each output into operand stack + const auto def_operands = _env->graph().operations().at(current_operation_index).getOutputs(); + for (auto def_operand : def_operands) + { + VERBOSE(INTERPRETER) << "Buffer: Push to operand stack " << def_operand.value() + << std::endl; + operand_stack.push(def_operand); + } + + // 4. Free if lifetime of buffer operands used by input is finished + for (auto input_index : _env->graph().operations().at(current_operation_index).getInputs()) + { + const auto use_operators = _env->graph().operands().at(input_index).getUses(); + bool dead_buffer = true; + for (auto use_operator : use_operators.list()) + { + if (executed.find(use_operator) == executed.end()) + { + dead_buffer = false; + break; + } + } + + if (dead_buffer) + { + _env->freeIfAllocated(input_index); + } + } + } + } +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/Interpreter.h b/runtime/neurun/core/src/exec/interp/Interpreter.h new file mode 100644 index 000000000..1b73592b3 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Interpreter.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Interpreter.h + * @brief This file contains Interpreter class for interpretation + */ +#ifndef __NEURUN_EXEC_INTERP_INTERPRETER_H__ +#define __NEURUN_EXEC_INTERP_INTERPRETER_H__ + +#include "ExecEnv.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Class for interpretation + */ +class Interpreter +{ + +public: + /** + * @brief Construct a new Interpreter object (deleted) + */ + Interpreter() = delete; + /** + * @brief Construct a new Interpreter object + * @param[in] env Execution environment variable for interpreter object + */ + Interpreter(std::unique_ptr<ExecEnv> env) : _env{std::move(env)} + { + // DO NOTHING + } + +public: + /** + * @brief Run interpreter until there is no operation to execute + */ + void run(); + +private: + std::unique_ptr<ExecEnv> _env; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_INTERPRETER_H__ diff --git a/runtime/neurun/core/src/exec/interp/Registration.h b/runtime/neurun/core/src/exec/interp/Registration.h new file mode 100644 index 000000000..3ebe3bc9f --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Registration.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_INTERP_REGISTRATION_H__ +#define __NEURUN_EXEC_INTERP_REGISTRATION_H__ + +#include "ExecEnv.h" + +#include "ir/Operation.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +struct OpKernel +{ + std::function<void(ExecEnv *, const ir::Operation &)> prepare; + std::function<void(const ExecEnv *, const ir::Operation &)> invoke; +}; + +// Defined in operations/ directory +OpKernel *getAdd(); +OpKernel *getSub(); +OpKernel *getMul(); +OpKernel *getConv2D(); +OpKernel *getMaxPool2D(); +OpKernel *getConcat(); +OpKernel *getGather(); +OpKernel *getAvgPool2D(); +OpKernel *getFullyConnected(); +OpKernel *getInstanceNorm(); +OpKernel *getSoftMax(); +OpKernel *getDepthwiseConv(); +OpKernel *getReshape(); +OpKernel *getTransposeConv(); +OpKernel *getLogistic(); +OpKernel *getPad(); +OpKernel *getReLU(); +OpKernel *getReLU1(); +OpKernel *getReLU6(); +OpKernel *getTanh(); + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_REGISTRATION_H__ diff --git a/runtime/neurun/core/src/exec/interp/Tensor.cc b/runtime/neurun/core/src/exec/interp/Tensor.cc new file mode 100644 index 000000000..5c1da3587 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Tensor.cc @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Tensor.h" + +#define NO_USE(a) (void)(a) + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +void ITensor::access(const std::function<void(backend::operand::ITensor &tensor)> &fn) +{ + fn(*this); +} + +size_t ROTensor::calcOffset(const neurun::util::Coordinates &coords) const +{ + NO_USE(coords); + throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now."); +} + +size_t Tensor::calcOffset(const neurun::util::Coordinates &coords) const +{ + NO_USE(coords); + throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now."); +} + +ir::Layout ROTensor::layout() const +{ + // TODO Changes to return frontend layout + return ir::Layout::NHWC; +} + +ir::Layout Tensor::layout() const +{ + // TODO Changes to return frontend layout + return ir::Layout::NHWC; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/Tensor.h b/runtime/neurun/core/src/exec/interp/Tensor.h new file mode 100644 index 000000000..c53fd46a6 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Tensor.h @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Tensor.h + * @brief This file contains ITensor interface, ROTensor class, and Tensor class + */ +#ifndef __NEURUN_EXEC_INTERP_TENSOR_H__ +#define __NEURUN_EXEC_INTERP_TENSOR_H__ + +#include "Buffer.h" + +#include "ir/OperandInfo.h" +#include "backend/operand/ITensor.h" +#include "ir/Layout.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Interface to handle Tensor in interpreter + */ +class ITensor : public backend::operand::ITensor +{ +public: + virtual ~ITensor() = default; + +public: + virtual uint8_t *buffer() const = 0; + /** + * @brief Return shared pointer for buffer + * @return Buffer shared pointer + */ + virtual std::shared_ptr<const Buffer> shareBuffer() const = 0; + /** + * @brief Return read-only buffer pointer + * @return Read-only buffer pointer + */ + virtual const uint8_t *bufferRO() const = 0; + /** + * @brief Return shared pointer for data + * @return Data shared pointer + */ + virtual std::shared_ptr<const ir::Data> shareData() const = 0; + /** + * @brief Set internal/external buffer + * @param[in] buffer Buffer pointer + */ + virtual void setBuffer(std::shared_ptr<const Buffer> buffer) = 0; + /** + * @brief Set data reference (including constant, input) + * @param[in] data Data pointer + */ + virtual void setData(std::shared_ptr<const ir::Data> data) = 0; + virtual void releaseData() = 0; + + virtual size_t total_size() const = 0; + virtual size_t dimension(size_t index) const = 0; + virtual size_t num_dimensions() const = 0; + virtual size_t calcOffset(const util::Coordinates &coords) const = 0; + + virtual bool has_padding() const = 0; + /** + * @brief Return data type of tensor + * @return Data type of tensor + */ + virtual ir::DataType data_type() const = 0; + /** + * @brief Return TensorInfo + * @return TensorInfo + */ + virtual const ir::OperandInfo &tensorInfo() const = 0; + /** + * @brief Return number of elements + * @return Number of elements + */ + virtual uint64_t num_elements() const = 0; + void access(const std::function<void(backend::operand::ITensor &tensor)> &fn) final; +}; + +/** + * @brief Class to handle tensor in interpreter as read-only + */ +class ROTensor final : public ITensor +{ +public: + ROTensor() = delete; + ROTensor(const ir::OperandInfo &info) : _info(info) + { + // DO NOTHING + } + +public: + uint8_t *buffer() const override { throw std::runtime_error{"Read only tensor"}; } + std::shared_ptr<const Buffer> shareBuffer() const override + { + throw std::runtime_error{"Read only tensor"}; + } + const uint8_t *bufferRO() const override { return _data->base(); } + std::shared_ptr<const ir::Data> shareData() const override { return _data; } + void setBuffer(std::shared_ptr<const Buffer> buffer) override { _data = buffer; } + void setData(std::shared_ptr<const ir::Data> data) override { _data = data; } + void releaseData() override { _data = nullptr; } + + size_t total_size() const override { return _info.total_size(); } + size_t dimension(size_t index) const override { return _info.shape().dim(index); } + size_t num_dimensions() const override { return _info.shape().rank(); } + size_t calcOffset(const util::Coordinates &coords) const override; + ir::Layout layout() const override; + bool has_padding() const override { return false; } + ir::DataType data_type() const override { return _info.typeInfo().type(); } + const ir::OperandInfo &tensorInfo() const override { return _info; } + uint64_t num_elements() const override { return _info.shape().num_elements(); }; + +private: + const ir::OperandInfo _info; + std::shared_ptr<const ir::Data> _data{nullptr}; +}; + +/** + * @brief Class to handle tensor in interpreter as writable + */ +class Tensor final : public ITensor +{ +public: + Tensor() = delete; + Tensor(const ir::OperandInfo &info) : _info(info) + { + // DO NOTHING + } + +public: + uint8_t *buffer() const override { return _buffer->baseWritable(); } + std::shared_ptr<const Buffer> shareBuffer() const override { return _buffer; }; + const uint8_t *bufferRO() const override { return _buffer->base(); } + std::shared_ptr<const ir::Data> shareData() const override { return _buffer; } + void setBuffer(std::shared_ptr<const Buffer> buffer) override { _buffer = buffer; } + void setData(std::shared_ptr<const ir::Data>) override + { + throw std::runtime_error{"Passed data may read-only"}; + } + void releaseData() override { _buffer = nullptr; } + + size_t total_size() const override { return _info.total_size(); } + size_t dimension(size_t index) const override { return _info.shape().dim(index); } + size_t num_dimensions() const override { return _info.shape().rank(); } + size_t calcOffset(const util::Coordinates &coords) const override; + ir::Layout layout() const override; + bool has_padding() const override { return false; } + ir::DataType data_type() const override { return _info.typeInfo().type(); } + const ir::OperandInfo &tensorInfo() const override { return _info; } + uint64_t num_elements() const override { return _info.shape().num_elements(); }; + +private: + const ir::OperandInfo _info; + std::shared_ptr<const Buffer> _buffer{nullptr}; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_TENSOR_H__ diff --git a/runtime/neurun/core/src/exec/interp/operations/AvgPool2D.cc b/runtime/neurun/core/src/exec/interp/operations/AvgPool2D.cc new file mode 100644 index 000000000..bd396491f --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/AvgPool2D.cc @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/AveragePool.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/AvgPool2D.h" +#include "util/Utils.h" +#include "util/Padding.h" +#include "util/ShapeInference.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace avgpool2d +{ + +void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + UNUSED_RELEASE(in_tensor); + + assert(in_tensor->num_dimensions() == 4); + + const auto output_info = env->graph().operands().at(out_index).info(); + if (output_info.total_size() == 0) + { + // Handle unspecified output shape + const auto &avgpool_node = + nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node); + const auto infered_output_shapes = + shape_inference::inferAvgPoolShape(in_tensor->tensorInfo().shape(), avgpool_node.param()); + env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()}); + } + else + { + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 4); +} + +void invoke(const ITensor *in_tensor, const ITensor *out_tensor, + const ir::operation::AvgPool2D::Param ¶m) +{ + // TODO Support NCHW frontend + const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape, + param.stride, param.kw, param.kh); + // Calculate + nnfw::cker::PoolParams cker_param; + calculateActivationRange(param.activation, &cker_param.float_activation_min, + &cker_param.float_activation_max); + cker_param.filter_width = param.kw; + cker_param.filter_height = param.kh; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + + const auto in_shape = convertShape(in_tensor->tensorInfo().shape()); + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO()); + float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer()); + + nnfw::cker::AveragePool(cker_param, in_shape, in_ptr, out_shape, out_ptr); +} + +void invokeAvgPool2D(const ExecEnv *env, const ir::Operation &node) +{ + const auto &avgpool_node = + nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node); + + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + // Check lhs shape is same with rhs (with broadcast) + const auto in_tensor = env->tensorAt(in_index); + const auto out_tensor = env->tensorAt(out_index); + + const auto data_type = in_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(in_tensor, out_tensor, avgpool_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float only"}; + } +} +} // namespace avgpool2d + +OpKernel *getAvgPool2D() +{ + static OpKernel kernel = {avgpool2d::prepareAvgPool2D, avgpool2d::invokeAvgPool2D}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/BinaryArithmeticOps.cc b/runtime/neurun/core/src/exec/interp/operations/BinaryArithmeticOps.cc new file mode 100644 index 000000000..16469b9db --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/BinaryArithmeticOps.cc @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/BinaryArithmeticOps.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Add.h" +#include "ir/operation/Sub.h" +#include "ir/operation/Mul.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +enum class OpType +{ + ADD, + SUB, + MUL +}; + +template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation &node) +{ + const auto &add_node = nnfw::misc::polymorphic_downcast<const node_type &>(node); + + const auto lhs_index = node.getInputs().at(add_node.LHS); + const auto rhs_index = node.getInputs().at(add_node.RHS); + const auto out_index = node.getOutputs().at(0); + + const auto lhs_tensor = env->tensorAt(lhs_index); + const auto rhs_tensor = env->tensorAt(rhs_index); + + // Check shape and type lhs is same with rhs + // TODO Util function to compare TensorInfo + if (lhs_tensor->data_type() != rhs_tensor->data_type()) + { + throw std::runtime_error{"Interp(Add): Different input types"}; + } + + bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape()); + if (try_broadcast) + { + bool success = true; + auto out_shape = calcBroadcastShape(lhs_tensor->tensorInfo().shape(), + rhs_tensor->tensorInfo().shape(), success); + if (!success) + { + throw std::runtime_error{"Interp(Add): Fail to brodcasting"}; + } + + auto output_info = ir::OperandInfo(out_shape, lhs_tensor->tensorInfo().typeInfo()); + // We can handle already allocated (ex. model output) + env->allocateIfNeeded(out_index, output_info); + } + else + { + // Output's shape and type is same with input + auto output_info = lhs_tensor->tensorInfo(); + // We can handle already allocated (ex. model output) + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + // Check shape and type lhs is same with output + // TODO Util function to compare TensorInfo + if (lhs_tensor->data_type() != out_tensor->data_type()) + { + throw std::runtime_error{"Interp(Add): Invalid output type"}; + } +} + +inline void setActivationParams(float min, float max, nnfw::cker::BinaryArithmeticOpParam *params) +{ + params->float_activation_min = min; + params->float_activation_max = max; +} + +inline void setActivationParams(int32_t min, int32_t max, + nnfw::cker::BinaryArithmeticOpParam *params) +{ + params->quantized_activation_min = min; + params->quantized_activation_max = max; +} + +template <typename raw_type, typename param_type, OpType op_type> +void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor, + const param_type ¶m) +{ + const auto lhs_buffer = lhs_tensor->bufferRO(); + const auto rhs_buffer = rhs_tensor->bufferRO(); + auto out_buffer = out_tensor->buffer(); + + nnfw::cker::BinaryArithmeticOpParam cker_param; + raw_type activation_min, activation_max; + calculateActivationRange(param.activation, &activation_min, &activation_max); + setActivationParams(activation_min, activation_max, &cker_param); + const raw_type *lhs_ptr = reinterpret_cast<const raw_type *>(lhs_buffer); + const raw_type *rhs_ptr = reinterpret_cast<const raw_type *>(rhs_buffer); + raw_type *out_ptr = reinterpret_cast<raw_type *>(out_buffer); + + // Calculate + const std::function<raw_type(const raw_type &, const raw_type &)> fn_add = + [](const raw_type &a, const raw_type &b) { return a + b; }; + const std::function<raw_type(const raw_type &, const raw_type &)> fn_sub = + [](const raw_type &a, const raw_type &b) { return a - b; }; + const std::function<raw_type(const raw_type &, const raw_type &)> fn_mul = + [](const raw_type &a, const raw_type &b) { return a * b; }; + + const std::function<raw_type(const raw_type &, const raw_type &)> fn = + (op_type == OpType::ADD) ? fn_add : ((op_type == OpType::SUB) ? fn_sub : fn_mul); + + if (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape()) + { + const auto lhs_shape = convertExtendShape(lhs_tensor->tensorInfo().shape()); + const auto rhs_shape = convertExtendShape(rhs_tensor->tensorInfo().shape()); + const auto out_shape = convertExtendShape(out_tensor->tensorInfo().shape()); + nnfw::cker::BroadcastBinaryArithmeticOpSlow(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr, + out_shape, out_ptr, fn); + return; + } + + const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape()); + const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape()); + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + nnfw::cker::BinaryArithmeticOp(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr, out_shape, + out_ptr, fn); +} + +template <typename node_type, typename param_type, OpType op_type> +void invokeAdd(const ExecEnv *env, const ir::Operation &node) +{ + const auto &arithmetic_node = nnfw::misc::polymorphic_downcast<const node_type &>(node); + + const auto lhs_index = node.getInputs().at(arithmetic_node.LHS); + const auto rhs_index = node.getInputs().at(arithmetic_node.RHS); + const auto out_index = node.getOutputs().at(0); + const auto lhs_tensor = env->tensorAt(lhs_index); + const auto rhs_tensor = env->tensorAt(rhs_index); + const auto out_tensor = env->tensorAt(out_index); + const auto data_type = lhs_tensor->data_type(); + + if (data_type == ir::DataType::INT32) + { + invoke<int32_t, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor, + arithmetic_node.param()); + } + else if (data_type == ir::DataType::FLOAT32) + { + invoke<float, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor, arithmetic_node.param()); + } + else + { + throw std::runtime_error{"NYI: Unsupported data type"}; + } +} +} // namespace add + +OpKernel *getAdd() +{ + static OpKernel kernel = {prepareAdd<ir::operation::Add>, + invokeAdd<ir::operation::Add, ir::operation::Add::Param, OpType::ADD>}; + return &kernel; +} + +OpKernel *getSub() +{ + static OpKernel kernel = {prepareAdd<ir::operation::Sub>, + invokeAdd<ir::operation::Sub, ir::operation::Sub::Param, OpType::SUB>}; + return &kernel; +} + +OpKernel *getMul() +{ + static OpKernel kernel = {prepareAdd<ir::operation::Mul>, + invokeAdd<ir::operation::Mul, ir::operation::Mul::Param, OpType::MUL>}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/Concat.cc b/runtime/neurun/core/src/exec/interp/operations/Concat.cc new file mode 100644 index 000000000..a127e5f30 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Concat.cc @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Concatenation.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Concat.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace concat +{ + +void prepareConcat(ExecEnv *env, const ir::Operation &node) +{ + const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node); + + const auto first_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto first_tensor = env->tensorAt(first_index); + uint32_t out_axis_dimension = 0; + const int32_t axis_raw = concat_node.param().axis; + const uint32_t axis = (axis_raw < 0) ? (axis_raw + first_tensor->num_dimensions()) : axis_raw; + + // All inputs shape should be same except axis dimension + // All inputs type should be same + for (auto input : node.getInputs()) + { + assert(first_tensor->num_dimensions() == env->tensorAt(input)->num_dimensions()); + assert(first_tensor->data_type() == env->tensorAt(input)->data_type()); + for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++) + { + if (i == axis) + { + out_axis_dimension += env->tensorAt(input)->dimension(i); + continue; + } + assert(first_tensor->dimension(i) == env->tensorAt(input)->dimension(i)); + } + } + + // Make output tensor info using first input tensor info, and accumulated axis dimension value + auto out_shape = first_tensor->tensorInfo().shape(); + out_shape.dim(axis) = out_axis_dimension; + env->allocateIfNeeded(out_index, + ir::OperandInfo{out_shape, first_tensor->tensorInfo().typeInfo()}); + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Output shape should be same with input except axis dimension + // Output type should be same with input + assert(first_tensor->data_type() == out_tensor->data_type()); + for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++) + { + if (i == axis) + { + continue; + } + assert(first_tensor->dimension(i) == out_tensor->dimension(i)); + } +} + +void invoke(const std::vector<const ITensor *> in_tensors, const ITensor *out_tensor, uint32_t axis) +{ + const uint32_t count = in_tensors.size(); + + // Calculate + nnfw::cker::ConcatenationParams cker_param; + cker_param.axis = (int8_t)axis; + cker_param.inputs_count = count; + + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + + std::vector<nnfw::cker::Shape> in_shapes; + std::vector<const nnfw::cker::Shape *> in_shape_ptrs; + in_shapes.reserve(count); + in_shape_ptrs.reserve(count); + std::vector<const float *> in_ptrs; + for (uint32_t i = 0; i < count; i++) + { + in_shapes.push_back(convertShape(in_tensors[i]->tensorInfo().shape())); + in_shape_ptrs.push_back(&in_shapes[i]); + in_ptrs.push_back(reinterpret_cast<const float *>(in_tensors[i]->bufferRO())); + } + + auto out_buffer = out_tensor->buffer(); + float *out_ptr = reinterpret_cast<float *>(out_buffer); + + nnfw::cker::Concatenation<float>(cker_param, in_shape_ptrs.data(), in_ptrs.data(), out_shape, + out_ptr); +} + +void invokeConcat(const ExecEnv *env, const ir::Operation &node) +{ + const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node); + const int32_t axis_raw = concat_node.param().axis; + + std::vector<const ITensor *> in_tensors; + for (const auto &e : concat_node.getInputs()) + { + in_tensors.emplace_back(env->tensorAt(e)); + } + + const auto out_index = node.getOutputs().at(0); + const auto out_tensor = env->tensorAt(out_index); + const uint32_t axis = (axis_raw < 0) ? (axis_raw + out_tensor->num_dimensions()) : axis_raw; + + const auto data_type = in_tensors[0]->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(in_tensors, out_tensor, axis); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} +} // namespace concat + +OpKernel *getConcat() +{ + static OpKernel kernel = {concat::prepareConcat, concat::invokeConcat}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/Conv2D.cc b/runtime/neurun/core/src/exec/interp/operations/Conv2D.cc new file mode 100644 index 000000000..5242247a4 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Conv2D.cc @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Conv.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Conv2D.h" +#include "util/Utils.h" +#include "util/Padding.h" +#include "util/ShapeInference.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace conv2d +{ + +void prepareConv2D(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(ir::operation::Conv2D::INPUT); + const auto kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL); + const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto kernel_tensor = env->tensorAt(kernel_index); + const auto bias_tensor = env->tensorAt(bias_index); + + assert(in_tensor->num_dimensions() == 4); + assert(kernel_tensor->num_dimensions() == 4); + assert(bias_tensor->num_dimensions() == 1); + + UNUSED_RELEASE(in_tensor); + UNUSED_RELEASE(kernel_tensor); + UNUSED_RELEASE(bias_tensor); + + const auto output_info = env->graph().operands().at(out_index).info(); + if (output_info.total_size() == 0) + { + // Handle unspecified output shape + const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node); + const auto infered_output_shapes = shape_inference::inferConv2DShape( + in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), conv_node.param()); + env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()}); + } + else + { + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 4); +} + +void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor, + const ITensor *ofm_tensor, const ir::operation::Conv2D::Param ¶m) +{ + // TODO Support NCHW frontned + const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &ker_shape = ker_tensor->tensorInfo().shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape, + param.stride, ker_width, ker_height); + + // Calculate + float activation_min, activation_max; + calculateActivationRange(param.activation, &activation_min, &activation_max); + + nnfw::cker::ConvParams cker_param; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + cker_param.dilation_width_factor = 1; + cker_param.dilation_height_factor = 1; + cker_param.float_activation_min = activation_min; + cker_param.float_activation_max = activation_max; + + const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape()); + const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape()); + const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape()); + const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape()); + const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO()); + const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO()); + const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO()); + float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer()); + + nnfw::cker::Conv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, cker_bias_shape, + bias_ptr, cker_ofm_shape, ofm_ptr); +} + +void invokeConv2D(const ExecEnv *env, const ir::Operation &node) +{ + const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node); + + const auto ifm_index = node.getInputs().at(ir::operation::Conv2D::INPUT); + const auto ker_index = node.getInputs().at(ir::operation::Conv2D::KERNEL); + const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto bias_tensor = env->tensorAt(bias_index); + const auto ofm_tensor = env->tensorAt(ofm_index); + + const auto data_type = ifm_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} +} // namespace conv2d + +OpKernel *getConv2D() +{ + static OpKernel kernel = {conv2d::prepareConv2D, conv2d::invokeConv2D}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/DepthwiseConv.cc b/runtime/neurun/core/src/exec/interp/operations/DepthwiseConv.cc new file mode 100644 index 000000000..1d3649f48 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/DepthwiseConv.cc @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/DepthwiseConv.h> +#include <misc/polymorphic_downcast.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/DepthwiseConv2D.h" +#include "util/Padding.h" +#include "util/Utils.h" +#include "util/ShapeInference.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +namespace +{ + +void prepareDepthwiseConv(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT); + const auto kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL); + const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto kernel_tensor = env->tensorAt(kernel_index); + const auto bias_tensor = env->tensorAt(bias_index); + + assert(in_tensor->num_dimensions() == 4); + assert(kernel_tensor->num_dimensions() == 4); + assert(bias_tensor->num_dimensions() == 1); + + UNUSED_RELEASE(in_tensor); + UNUSED_RELEASE(kernel_tensor); + UNUSED_RELEASE(bias_tensor); + + // TODO handle unspecified output shape: + // calculate output shape using ifm shape, kernel shape, padding, stride + const auto output_info = env->graph().operands().at(out_index).info(); + if (output_info.total_size() == 0) + { + // Handle unspecified output shape + const auto &depth_conv_node = + nnfw::misc::polymorphic_downcast<const ir::operation::DepthwiseConv2D &>(node); + const auto infered_output_shapes = shape_inference::inferDepthwiseConv2DShape( + in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), + depth_conv_node.param()); + env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()}); + } + else + { + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 4); +} + +void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor, + const ITensor *ofm_tensor, const ir::operation::DepthwiseConv2D::Param ¶m) +{ + // TODO Support NCHW frontend + const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + // Kernel format is [1, kernel_height, kernel_width, depth_out]. + const auto &ker_shape = ker_tensor->tensorInfo().shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape, + param.stride, ker_width, ker_height); + + // Calculate + float activation_min, activation_max; + calculateActivationRange(param.activation, &activation_min, &activation_max); + + nnfw::cker::DepthwiseConvParams cker_param; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.depth_multiplier = param.multiplier; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + cker_param.dilation_width_factor = 1; + cker_param.dilation_height_factor = 1; + cker_param.float_activation_min = activation_min; + cker_param.float_activation_max = activation_max; + + const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape()); + const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape()); + const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape()); + const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape()); + const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO()); + const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO()); + const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO()); + float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer()); + + nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, + cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr); +} + +void invokeDepthwiseConv(const ExecEnv *env, const ir::Operation &node) +{ + const auto &conv_node = static_cast<const ir::operation::DepthwiseConv2D &>(node); + + const auto ifm_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT); + const auto ker_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL); + const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto bias_tensor = env->tensorAt(bias_index); + const auto ofm_tensor = env->tensorAt(ofm_index); + + const auto data_type = ifm_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} + +} // namespace + +OpKernel *getDepthwiseConv() +{ + static OpKernel kernel = {prepareDepthwiseConv, invokeDepthwiseConv}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/FullyConnected.cc b/runtime/neurun/core/src/exec/interp/operations/FullyConnected.cc new file mode 100644 index 000000000..9c1c5d4e2 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/FullyConnected.cc @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/FullyConnected.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/FullyConnected.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace fc +{ + +void prepareFC(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(ir::operation::FullyConnected::INPUT); + const auto kernel_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT); + const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto kernel_tensor = env->tensorAt(kernel_index); + const auto bias_tensor = env->tensorAt(bias_index); + + UNUSED_RELEASE(in_tensor); + UNUSED_RELEASE(kernel_tensor); + UNUSED_RELEASE(bias_tensor); + + assert(in_tensor->num_dimensions() >= 2); + assert(kernel_tensor->num_dimensions() == 2); + assert(bias_tensor->num_dimensions() == 1); + + const auto input_size_with_batch = in_tensor->num_elements(); + const auto num_units = kernel_tensor->dimension(0); + const auto input_size = kernel_tensor->dimension(1); + const auto batch_size = input_size_with_batch / input_size; + assert(input_size_with_batch % input_size == 0); + assert(num_units == bias_tensor->dimension(0)); + + // Make output tensor info + ir::Shape output_shape(2); + output_shape.dim(0) = batch_size; + output_shape.dim(1) = num_units; + const ir::OperandInfo out_info{output_shape, in_tensor->tensorInfo().typeInfo()}; + env->allocateIfNeeded(out_index, out_info); + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 2); + assert(out_tensor->dimension(0) == batch_size); + assert(out_tensor->dimension(1) == num_units); +} + +void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor, + const ITensor *ofm_tensor, const ir::operation::FullyConnected::Param ¶m) +{ + const auto ifm_buffer = ifm_tensor->bufferRO(); + const auto ker_buffer = ker_tensor->bufferRO(); + const auto bias_buffer = bias_tensor->bufferRO(); + auto ofm_buffer = ofm_tensor->buffer(); + + // Calculate + nnfw::cker::FullyConnectedParams cker_param; + calculateActivationRange(param.activation, &cker_param.float_activation_min, + &cker_param.float_activation_max); + const auto cker_ifm_shape = convertExtendShape(ifm_tensor->tensorInfo().shape()); + const auto cker_ker_shape = convertExtendShape(ker_tensor->tensorInfo().shape()); + const auto cker_bias_shape = convertExtendShape(bias_tensor->tensorInfo().shape()); + const auto cker_ofm_shape = convertExtendShape(ofm_tensor->tensorInfo().shape()); + const float *ifm_ptr = reinterpret_cast<const float *>(ifm_buffer); + const float *ker_ptr = reinterpret_cast<const float *>(ker_buffer); + const float *bias_ptr = reinterpret_cast<const float *>(bias_buffer); + float *ofm_ptr = reinterpret_cast<float *>(ofm_buffer); + + nnfw::cker::FullyConnected(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, + cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr); +} + +void invokeFC(const ExecEnv *env, const ir::Operation &node) +{ + const auto &conv_node = + nnfw::misc::polymorphic_downcast<const ir::operation::FullyConnected &>(node); + + const auto ifm_index = node.getInputs().at(ir::operation::FullyConnected::INPUT); + const auto ker_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT); + const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto bias_tensor = env->tensorAt(bias_index); + const auto ofm_tensor = env->tensorAt(ofm_index); + + const auto data_type = ifm_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float only"}; + } +} +} // namespace fc + +OpKernel *getFullyConnected() +{ + static OpKernel kernel = {fc::prepareFC, fc::invokeFC}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/Gather.cc b/runtime/neurun/core/src/exec/interp/operations/Gather.cc new file mode 100644 index 000000000..8b64d1937 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Gather.cc @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Gather.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Gather.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void prepareGather(ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT); + const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + const auto indices_tensor = env->tensorAt(indices_index); + + // TODO handle unspecified output shape: + // calculate output shape using ifm shape, kernel shape, padding, stride + const auto output_info = env->graph().operands().at(output_index).info(); + if (output_info.total_size() == 0) + { + throw std::runtime_error{"Interp(Gather): NYI for unspecified output shape"}; + } + else + { + env->allocateIfNeeded(output_index, output_info); + } + + if (indices_tensor->data_type() != ir::DataType::INT32) + { + throw std::runtime_error{"Interp(Gather): Invalid indices data type"}; + } + + auto output_tensor = env->tensorAt(output_index); + auto output_rank = input_tensor->num_dimensions() + indices_tensor->num_dimensions() - 1; + + if (output_rank != output_tensor->num_dimensions()) + { + throw std::runtime_error{"Interp(Gather): Invalid output rank"}; + } + if (output_tensor->data_type() != input_tensor->data_type()) + { + throw std::runtime_error{"Interp(Gather): Invalid output data type"}; + } + + if (input_tensor->data_type() == ir::DataType::QUANT8_ASYMM && + input_tensor->tensorInfo().typeInfo() != output_tensor->tensorInfo().typeInfo()) + { + throw std::runtime_error{ + "Interp(Gather): Cannot handle different I/O QUANT8_ASYMM scale/offset"}; + } +} + +template <typename raw_type> +void invoke(const ITensor *input_tensors, const ITensor *indices_tensors, + const ITensor *output_tensor, uint32_t axis) +{ + // Calculate + nnfw::cker::GatherParams cker_param; + cker_param.axis = (int8_t)axis; + + const auto cker_input_shapes = convertShape(input_tensors->tensorInfo().shape()); + const auto cker_indices_shape = convertShape(indices_tensors->tensorInfo().shape()); + const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); + const raw_type *input_ptr = reinterpret_cast<const raw_type *>(input_tensors->bufferRO()); + const int32_t *indices_ptr = reinterpret_cast<const int32_t *>(indices_tensors->bufferRO()); + raw_type *output_ptr = reinterpret_cast<raw_type *>(output_tensor->buffer()); + + nnfw::cker::Gather<raw_type>(cker_param, cker_input_shapes, input_ptr, cker_indices_shape, + indices_ptr, cker_output_shape, output_ptr); +} + +void invokeGather(const ExecEnv *env, const ir::Operation &node) +{ + const auto &gather_node = nnfw::misc::polymorphic_downcast<const ir::operation::Gather &>(node); + const int32_t axis_raw = gather_node.param().axis; + + const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT); + const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + const auto indices_tensor = env->tensorAt(indices_index); + const auto output_tensor = env->tensorAt(output_index); + const uint32_t axis = (axis_raw < 0) ? (axis_raw + input_tensor->num_dimensions()) : axis_raw; + + const auto data_type = input_tensor->data_type(); + + switch (data_type) + { + case ir::DataType::FLOAT32: + invoke<float>(input_tensor, indices_tensor, output_tensor, axis); + break; + case ir::DataType::INT32: + invoke<int32_t>(input_tensor, indices_tensor, output_tensor, axis); + break; + case ir::DataType::QUANT8_ASYMM: + invoke<uint8_t>(input_tensor, indices_tensor, output_tensor, axis); + break; + default: + throw std::runtime_error{"Interp(Gather): NYI - Not supported type"}; + } +} + +} // namespace concat + +OpKernel *getGather() +{ + static OpKernel kernel = {prepareGather, invokeGather}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/InstanceNorm.cc b/runtime/neurun/core/src/exec/interp/operations/InstanceNorm.cc new file mode 100644 index 000000000..d1623d53c --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/InstanceNorm.cc @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/InstanceNorm.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/InstanceNorm.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace instancenorm +{ + +void prepareInstanceNorm(ExecEnv *env, const ir::Operation &node) +{ + const auto &instancenorm_node = + nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node); + + const auto input_index = node.getInputs().at(instancenorm_node.INPUT); + const auto output_index = node.getOutputs().at(0); + const auto input_tensor = env->tensorAt(input_index); + + if (input_tensor->num_dimensions() != 4) + { + throw std::runtime_error{"Interp(InstanceNorm): Input should be 4D-tensor"}; + } + + // Output shape should be same with input + env->allocateIfNeeded(output_index, input_tensor->tensorInfo()); + + auto output_tensor = env->tensorAt(output_index); + UNUSED_RELEASE(output_tensor); + + // Handle same ifm & ofm data type only + assert(input_tensor->data_type() == output_tensor->data_type()); + assert(input_tensor->tensorInfo().shape() == output_tensor->tensorInfo().shape()); +} + +inline void setActivationParams(float min, float max, nnfw::cker::InstanceNormParams *params) +{ + params->float_activation_min = min; + params->float_activation_max = max; +} + +void invoke(const ITensor *input_tensor, const ITensor *gamma_tensor, const ITensor *beta_tensor, + const ITensor *output_tensor, const ir::operation::InstanceNorm::Param ¶m) +{ + // Calculate + float activation_min, activation_max; + calculateActivationRange(param.activation, &activation_min, &activation_max); + + nnfw::cker::InstanceNormParams cker_param; + cker_param.epsilon = param.epsilon; + cker_param.float_activation_min = activation_min; + cker_param.float_activation_max = activation_max; + + const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape()); + const auto cker_gamma_shape = convertShape(gamma_tensor->tensorInfo().shape()); + const auto cker_beta_shape = convertShape(beta_tensor->tensorInfo().shape()); + const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); + const float *input_ptr = reinterpret_cast<const float *>(input_tensor->bufferRO()); + const float *gamma_ptr = reinterpret_cast<const float *>(gamma_tensor->bufferRO()); + const float *beta_ptr = reinterpret_cast<const float *>(beta_tensor->bufferRO()); + float *output_ptr = reinterpret_cast<float *>(output_tensor->buffer()); + + nnfw::cker::InstanceNorm(cker_param, cker_input_shape, input_ptr, cker_gamma_shape, gamma_ptr, + cker_beta_shape, beta_ptr, cker_output_shape, output_ptr); +} + +void invokeInstanceNorm(const ExecEnv *env, const ir::Operation &node) +{ + const auto &instancenorm_node = + nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node); + + const auto input_index = node.getInputs().at(instancenorm_node.INPUT); + const auto gamma_index = node.getInputs().at(instancenorm_node.GAMMA); + const auto beta_index = node.getInputs().at(instancenorm_node.BETA); + const auto out_index = node.getOutputs().at(0); + const auto input_tensor = env->tensorAt(input_index); + const auto gamma_tensor = env->tensorAt(gamma_index); + const auto beta_tensor = env->tensorAt(beta_index); + const auto out_tensor = env->tensorAt(out_index); + const auto data_type = input_tensor->data_type(); + + if (data_type == ir::DataType::FLOAT32) + { + invoke(input_tensor, gamma_tensor, beta_tensor, out_tensor, instancenorm_node.param()); + } + else + { + throw std::runtime_error{"NYI: Unsupported data type"}; + } +} +} // namespace instancenorm + +OpKernel *getInstanceNorm() +{ + static OpKernel kernel = {instancenorm::prepareInstanceNorm, instancenorm::invokeInstanceNorm}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/Logistic.cc b/runtime/neurun/core/src/exec/interp/operations/Logistic.cc new file mode 100644 index 000000000..2fc68ffd2 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Logistic.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Logistic.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Logistic.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void prepareLogistic(ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(0); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + + const auto output_info = env->graph().operands().at(output_index).info(); + + // Check shape and type lhs is same with rhs + // TODO Util function to compare TensorInfo + if (output_info.total_size() == 0) + { + throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"}; + } + else + { + env->allocateIfNeeded(output_index, output_info); + } + + const auto output_tensor = env->tensorAt(output_index); + if (input_tensor->data_type() != output_tensor->data_type()) + { + throw std::runtime_error{"Interp(Logistic): Invalid output type"}; + } +} + +void invoke(const ITensor *input_tensor, const ITensor *output_tensor) +{ + const auto input_buffer = input_tensor->bufferRO(); + auto output_buffer = output_tensor->buffer(); + + const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape()); + const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); + const float *input_ptr = reinterpret_cast<const float *>(input_buffer); + float *output_ptr = reinterpret_cast<float *>(output_buffer); + + nnfw::cker::Logistic(cker_input_shape, input_ptr, cker_output_shape, output_ptr); +} + +void invokeLogistic(const ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(0); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + const auto output_tensor = env->tensorAt(output_index); + + const auto data_type = input_tensor->data_type(); + + if (data_type == ir::DataType::FLOAT32) + { + invoke(input_tensor, output_tensor); + } + else + { + throw std::runtime_error{"Interp(Logistic): NYI - Unsupported data type"}; + } +} +} // namespace + +OpKernel *getLogistic() +{ + static OpKernel kernel = {prepareLogistic, invokeLogistic}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/MaxPool2D.cc b/runtime/neurun/core/src/exec/interp/operations/MaxPool2D.cc new file mode 100644 index 000000000..3e1711d8e --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/MaxPool2D.cc @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/MaxPool.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/MaxPool2D.h" +#include "util/Utils.h" +#include "util/Padding.h" +#include "util/ShapeInference.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void prepareMaxPool2D(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + + assert(in_tensor->num_dimensions() == 4); + UNUSED_RELEASE(in_tensor); + + const auto output_info = env->graph().operands().at(out_index).info(); + if (output_info.total_size() == 0) + { + // Handle unspecified output shape + const auto &maxpool_node = + nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node); + const auto infered_output_shapes = + shape_inference::inferMaxPoolShape(in_tensor->tensorInfo().shape(), maxpool_node.param()); + env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()}); + } + else + { + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 4); +} + +void invoke(const ITensor *in_tensor, const ITensor *out_tensor, + const ir::operation::MaxPool2D::Param ¶m) +{ + // TODO support NCHW frontend + const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape, + param.stride, param.kw, param.kh); + // Calculate + nnfw::cker::PoolParams cker_param; + calculateActivationRange(param.activation, &cker_param.float_activation_min, + &cker_param.float_activation_max); + cker_param.filter_width = param.kw; + cker_param.filter_height = param.kh; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + + const auto in_shape = convertShape(in_tensor->tensorInfo().shape()); + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO()); + float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer()); + + nnfw::cker::MaxPool(cker_param, in_shape, in_ptr, out_shape, out_ptr); +} + +void invokeMaxPool2D(const ExecEnv *env, const ir::Operation &node) +{ + const auto &maxpool_node = + nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node); + + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto out_tensor = env->tensorAt(out_index); + + const auto data_type = in_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(in_tensor, out_tensor, maxpool_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} +} // namespace + +OpKernel *getMaxPool2D() +{ + static OpKernel kernel = {prepareMaxPool2D, invokeMaxPool2D}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/OperationUtil.h b/runtime/neurun/core/src/exec/interp/operations/OperationUtil.h new file mode 100644 index 000000000..5f4146bb8 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/OperationUtil.h @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_ +#define __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_ + +#include "ir/Shape.h" +#include "ir/InternalType.h" + +#include <cker/Shape.h> + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +inline nnfw::cker::Shape convertShape(const ir::Shape &shape) +{ + auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end()); + + std::vector<int32_t> raw_shape; + raw_shape.resize(4); + + for (uint32_t i = 0; i < 4; ++i) + { + if (i >= dimensions.size()) + { + raw_shape[i] = 1; + } + else + { + raw_shape[i] = dimensions[i]; + } + } + + return nnfw::cker::GetShape(raw_shape); +} + +inline nnfw::cker::Shape convertExtendShape(const ir::Shape &shape) +{ + auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end()); + + std::vector<int32_t> raw_shape; + raw_shape.resize(4); + uint32_t start = 4 - dimensions.size(); + + for (uint32_t i = 0; i < 4; ++i) + { + if (i < start) + { + raw_shape[i] = 1; + } + else + { + raw_shape[i] = dimensions[i - start]; + } + } + + return nnfw::cker::GetShape(raw_shape); +} + +template <typename T> +void calculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max) +{ + if (activation == ir::Activation::RELU) + { + *activation_min = 0; + *activation_max = std::numeric_limits<T>::max(); + } + else if (activation == ir::Activation::RELU6) + { + *activation_min = 0; + *activation_max = 6; + } + else if (activation == ir::Activation::RELU1) + { + *activation_min = -1; + *activation_max = 1; + } + else if (activation == ir::Activation::NONE) + { + *activation_min = std::numeric_limits<T>::lowest(); + *activation_max = std::numeric_limits<T>::max(); + } + else + { + throw std::runtime_error{"Unsupported activation type"}; + } +} + +inline ir::Shape calcBroadcastShape(const ir::Shape &lhs, const ir::Shape &rhs, bool &success) +{ + int lhs_rank = lhs.rank(); + int rhs_rank = rhs.rank(); + + int out_rank = (lhs_rank > rhs_rank ? lhs_rank : rhs_rank); + ir::Shape out_shape(out_rank); + + int lhs_idim = lhs_rank - 1; + int rhs_idim = rhs_rank - 1; + success = true; + for (int out_idim = out_rank - 1; out_idim >= 0; out_idim--) + { + if (lhs_idim == -1 && rhs_idim == -1) + { + // invalid result + success = false; + break; + } + + if (lhs_idim == -1) + { + out_shape.dim(out_idim) = rhs.dim(rhs_idim); + rhs_idim--; + } + else if (rhs_idim == -1) + { + out_shape.dim(out_idim) = lhs.dim(lhs_idim); + lhs_idim--; + } + else + { + if (lhs.dim(lhs_idim) == rhs.dim(rhs_idim)) + { + out_shape.dim(out_idim) = lhs.dim(lhs_idim); + lhs_idim--; + rhs_idim--; + } + else if (lhs.dim(lhs_idim) == 1) + { + out_shape.dim(out_idim) = rhs.dim(rhs_idim); + lhs_idim--; + rhs_idim--; + } + else if (rhs.dim(rhs_idim) == 1) + { + out_shape.dim(out_idim) = lhs.dim(lhs_idim); + lhs_idim--; + rhs_idim--; + } + else + { + // invalid result + success = false; + break; + } + } + } + + if (lhs_idim != -1 || rhs_idim != -1) + { + // invalid result + success = false; + } + return out_shape; +} + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_ diff --git a/runtime/neurun/core/src/exec/interp/operations/Pad.cc b/runtime/neurun/core/src/exec/interp/operations/Pad.cc new file mode 100644 index 000000000..0c8267a90 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Pad.cc @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Pad.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Pad.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void preparePad(ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + + const auto output_info = env->graph().operands().at(output_index).info(); + + // Check shape and type lhs is same with rhs + // TODO Util function to compare TensorInfo + if (output_info.total_size() == 0) + { + throw std::runtime_error{"Interp(Pad): NYI unspecified output shape"}; + } + else + { + env->allocateIfNeeded(output_index, output_info); + } + + const auto output_tensor = env->tensorAt(output_index); + if (input_tensor->data_type() != output_tensor->data_type()) + { + throw std::runtime_error{"Interp(Pad): Invalid output type"}; + } +} + +void invoke(const ITensor *input_tensor, const ITensor *pad_tensor, const ITensor *output_tensor) +{ + const auto input_buffer = input_tensor->bufferRO(); + const auto pad_buffer = pad_tensor->bufferRO(); + auto output_buffer = output_tensor->buffer(); + + int32_t pad_rank = pad_tensor->dimension(0); + + const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape()); + const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); + const float *input_ptr = reinterpret_cast<const float *>(input_buffer); + const int32_t *pad_ptr = reinterpret_cast<const int32_t *>(pad_buffer); + float *output_ptr = reinterpret_cast<float *>(output_buffer); + + nnfw::cker::Pad(pad_ptr, pad_rank, cker_input_shape, input_ptr, cker_output_shape, output_ptr, + nullptr); +} + +void invokePad(const ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT); + const auto pad_index = node.getInputs().at(ir::operation::Pad::PAD); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + const auto pad_tensor = env->tensorAt(pad_index); + const auto output_tensor = env->tensorAt(output_index); + + const auto data_type = input_tensor->data_type(); + + if (data_type == ir::DataType::FLOAT32) + { + invoke(input_tensor, pad_tensor, output_tensor); + } + else + { + throw std::runtime_error{"Interp(Pad): NYI - Unsupported data type"}; + } +} +} // namespace + +OpKernel *getPad() +{ + static OpKernel kernel = {preparePad, invokePad}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/Reshape.cc b/runtime/neurun/core/src/exec/interp/operations/Reshape.cc new file mode 100644 index 000000000..a160232de --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Reshape.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/interp/Registration.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void prepare(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + // Unspecified shape is not supported in operation node spec now + const auto output_info = env->graph().operands().at(out_index).info(); + env->allocateAndShareIfNeeded(out_index, output_info, in_index); + + assert(output_info.total_size() == env->graph().operands().at(in_index).info().total_size()); +} + +void invoke(const ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + if (env->tensorAt(in_index)->bufferRO() == env->tensorAt(out_index)->bufferRO()) + { + // Same data + return; + } + + const auto output_info = env->graph().operands().at(out_index).info(); + memcpy(env->tensorAt(out_index)->buffer(), env->tensorAt(in_index)->bufferRO(), + output_info.total_size()); +} + +} // namespace {anonymous} + +OpKernel *getReshape() +{ + static OpKernel kernel = {prepare, invoke}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/SoftMax.cc b/runtime/neurun/core/src/exec/interp/operations/SoftMax.cc new file mode 100644 index 000000000..91d98889f --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/SoftMax.cc @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/SoftMax.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Softmax.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void Softmax2D(const float *in, const int input_size, const int batch_size, const float beta, + float *out) +{ + assert(input_size > 0); + + // For each batch + for (int b = 0; b < batch_size; b++) + { + // Find the max coeff. + float max_coeff = in[0]; + for (int i = 1; i < input_size; i++) + { + if (in[i] > max_coeff) + max_coeff = in[i]; + } + + // Compute the normalized sum of exps. + float exp_sum = 0.0; + for (int i = 0; i < input_size; i++) + { + out[i] = std::exp((in[i] - max_coeff) * beta); + exp_sum += out[i]; + } + + // Divide by the sum of exps. + float reciprocal_sum_exp = 1.f / exp_sum; + for (int i = 0; i < input_size; i++) + { + out[i] *= reciprocal_sum_exp; + } + + // Advance in and out pointers for the next batch. + in += input_size; + out += input_size; + } +} + +void prepareSoftMax(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + UNUSED_RELEASE(in_tensor); + + assert((in_tensor->num_dimensions() == 4) || (in_tensor->num_dimensions() == 2)); + + // Output shape should be same with input + // Output type is pre-defined in model + const auto output_shape = env->graph().operands().at(in_index).info().shape(); + const auto output_type = env->graph().operands().at(out_index).info().typeInfo(); + + const ir::OperandInfo output_info{output_shape, output_type}; + env->allocateIfNeeded(out_index, output_info); + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Check output shape is same with input + assert(out_tensor->num_dimensions() == out_tensor->num_dimensions()); + for (uint32_t i = 0; i < in_tensor->num_dimensions(); i++) + { + assert(in_tensor->dimension(i) == out_tensor->dimension(i)); + } +} + +void invoke(const ITensor *in_tensor, const ITensor *out_tensor, + const ir::operation::Softmax::Param ¶m) +{ + const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO()); + float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer()); + + float beta = param.beta; + + if (in_tensor->num_dimensions() == 2) + { + uint32_t batch_size = in_tensor->dimension(0); + uint32_t input_size = in_tensor->dimension(1); + + Softmax2D(in_ptr, input_size, batch_size, beta, out_ptr); + } + else if (in_tensor->num_dimensions() == 4) + { + const auto in_shape = convertShape(in_tensor->tensorInfo().shape()); + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + + nnfw::cker::SoftmaxParams cker_param; + cker_param.beta = beta; + + nnfw::cker::Softmax(cker_param, in_shape, in_ptr, out_shape, out_ptr); + } + else + { + throw std::runtime_error{"Unsuported input dimension: support 2D or 4D"}; + } +} + +void invokeSoftMax(const ExecEnv *env, const ir::Operation &node) +{ + const auto &softmax_node = nnfw::misc::polymorphic_downcast<const ir::operation::Softmax &>(node); + + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto out_tensor = env->tensorAt(out_index); + + const auto in_data_type = in_tensor->data_type(); + const auto out_data_type = out_tensor->data_type(); + if ((in_data_type == ir::DataType::FLOAT32) && (out_data_type == ir::DataType::FLOAT32)) + { + invoke(in_tensor, out_tensor, softmax_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} + +} // namespace + +OpKernel *getSoftMax() +{ + static OpKernel kernel = {prepareSoftMax, invokeSoftMax}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/TransposeConv.cc b/runtime/neurun/core/src/exec/interp/operations/TransposeConv.cc new file mode 100644 index 000000000..70b72c88d --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/TransposeConv.cc @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/TransposeConv.h> +#include <misc/polymorphic_downcast.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/TransposeConv.h" +#include "util/Padding.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void prepareTransposeConv(ExecEnv *env, const ir::Operation &node) +{ + const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT); + const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL); + const auto ofm_shape_index = node.getInputs().at(ir::operation::TransposeConv::OUTPUT_SHAPE); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto ofm_shape_tensor = env->tensorAt(ofm_shape_index); + + assert(ifm_tensor->num_dimensions() == 4); + assert(ker_tensor->num_dimensions() == 4); + assert(ofm_shape_tensor->num_dimensions() == 1); + + UNUSED_RELEASE(ifm_tensor); + UNUSED_RELEASE(ker_tensor); + UNUSED_RELEASE(ofm_shape_tensor); + + const auto output_info = env->graph().operands().at(ofm_index).info(); + if (output_info.total_size() == 0) + { + // TODO: Handle unspecified output shape + throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"}; + } + else + { + env->allocateIfNeeded(ofm_index, output_info); + } + + auto ofm_tensor = env->tensorAt(ofm_index); + UNUSED_RELEASE(ofm_tensor); + + // Handle same ifm & ofm data type only + if (ifm_tensor->data_type() != ofm_tensor->data_type()) + { + throw std::runtime_error{"Interp(TConv): Different I/O data dype"}; + } + + if (ofm_tensor->num_dimensions() != 4) + { + throw std::runtime_error{"Interp(TConv): Invalid output rank"}; + } +} + +void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *ofm_tensor, + const ir::operation::TransposeConv::Param ¶m) +{ + const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto ker_shape = ker_tensor->tensorInfo().shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto padding = neurun::util::calculatePadding(param.padding, ofm_shape, ifm_shape, + param.stride, ker_width, ker_height); + + nnfw::cker::TransposeConvParams cker_param; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + cker_param.dilation_width_factor = 1; + cker_param.dilation_height_factor = 1; + + const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape()); + const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape()); + const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape()); + const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO()); + const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO()); + float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer()); + + nnfw::cker::TransposeConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, + cker_ofm_shape, ofm_ptr); +} + +void invokeTransposeConv(const ExecEnv *env, const ir::Operation &node) +{ + const auto &tconv_node = + nnfw::misc::polymorphic_downcast<const ir::operation::TransposeConv &>(node); + + const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT); + const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto ofm_tensor = env->tensorAt(ofm_index); + + const auto data_type = ifm_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(ifm_tensor, ker_tensor, ofm_tensor, tconv_node.param()); + } + else + { + throw std::runtime_error{"Interp(TConv): Support float32 only"}; + } +} + +} // namespace transposeconv + +OpKernel *getTransposeConv() +{ + static OpKernel kernel = {prepareTransposeConv, invokeTransposeConv}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/UnaryActivations.cc b/runtime/neurun/core/src/exec/interp/operations/UnaryActivations.cc new file mode 100644 index 000000000..116806fc4 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/UnaryActivations.cc @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cmath> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" + +#include "ir/operation/ReLU.h" +#include "ir/operation/ReLU1.h" +#include "ir/operation/ReLU6.h" +#include "ir/operation/Tanh.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +enum class ActivationType +{ + ReLU, + ReLU1, + ReLU6, + Tanh +}; + +void prepare(ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(0); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + + const auto output_info = env->graph().operands().at(output_index).info(); + if (output_info.total_size() == 0) + { + // Output's shape and type is same with input + auto input_info = input_tensor->tensorInfo(); + // We can handle already allocated (ex. model output) + env->allocateIfNeeded(output_index, input_info); + } + else + { + env->allocateIfNeeded(output_index, output_info); + } + + const auto output_tensor = env->tensorAt(output_index); + // Check shape and type lhs is same with output + // TODO Util function to compare TensorInfo + if (input_tensor->data_type() != output_tensor->data_type()) + { + throw std::runtime_error{"Interp(Activations): Invalid output type"}; + } +} + +template <ActivationType act_type> +void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements) +{ + std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); }; + switch (act_type) + { + case ActivationType::ReLU: + fn = [](const float &in) { return std::max(0.f, in); }; + break; + case ActivationType::ReLU1: + fn = [](const float &in) { return std::min(std::max(-1.f, in), 1.f); }; + break; + case ActivationType::ReLU6: + fn = [](const float &in) { return std::min(std::max(0.f, in), 6.f); }; + break; + case ActivationType::Tanh: + fn = [](const float &in) { return std::tanh(in); }; + break; + default: + throw std::runtime_error{"Interp(Activations): NYI - Unsupported activation"}; + break; + } + + const float *input_end = input_ptr + num_elements; + for (; input_ptr < input_end; input_ptr++, output_ptr++) + { + *output_ptr = fn(*input_ptr); + } +} + +template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(0); + const auto output_index = node.getOutputs().at(0); + + // Check lhs shape is same with rhs (with broadcast) + const auto input_tensor = env->tensorAt(input_index); + const auto output_tensor = env->tensorAt(output_index); + + const auto data_type = input_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + uint64_t elements = input_tensor->num_elements(); + const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO()); + float *out = reinterpret_cast<float *>(output_tensor->buffer()); + + evalFloat<act_type>(input_start, out, elements); + } + else + { + throw std::runtime_error{"Interp(ReLU6): NYI - Support float only"}; + } +} + +} // namespace + +OpKernel *getReLU() +{ + static OpKernel kernel = {prepare, invoke<ActivationType::ReLU>}; + return &kernel; +} + +OpKernel *getReLU1() +{ + static OpKernel kernel = {prepare, invoke<ActivationType::ReLU1>}; + return &kernel; +} + +OpKernel *getReLU6() +{ + static OpKernel kernel = {prepare, invoke<ActivationType::ReLU6>}; + return &kernel; +} + +OpKernel *getTanh() +{ + static OpKernel kernel = {prepare, invoke<ActivationType::Tanh>}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun |