diff options
Diffstat (limited to 'runtime/neurun')
559 files changed, 71905 insertions, 0 deletions
diff --git a/runtime/neurun/CMakeLists.txt b/runtime/neurun/CMakeLists.txt new file mode 100644 index 000000000..359006d54 --- /dev/null +++ b/runtime/neurun/CMakeLists.txt @@ -0,0 +1,16 @@ +if(NOT BUILD_NEURUN) + return() +endif(NOT BUILD_NEURUN) + +# Add cpu + +# TODO Remove this variable as adding include dirs is done with target_link_libraries +# (currently used by cpu/acl_cl kernel module which is not proper) +set(NEURUN_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/core/include) + +add_subdirectory(backend) +add_subdirectory(frontend) +add_subdirectory(core) +add_subdirectory(api) +add_subdirectory(test) +add_subdirectory(sample) diff --git a/runtime/neurun/api/CMakeLists.txt b/runtime/neurun/api/CMakeLists.txt new file mode 100644 index 000000000..c3f7702ad --- /dev/null +++ b/runtime/neurun/api/CMakeLists.txt @@ -0,0 +1,21 @@ +file(GLOB_RECURSE API_SRC "*.cc") + +set(NEURUN_DEV nnfw-dev) +add_library(${NEURUN_DEV} SHARED ${API_SRC}) + +# Public headers to publish +# nnfw_debug.h is header for runtime developer, so it will not be installed +# But runtime developer can use nnfw_debug.h by linking nnfw-dev +set(NNFW_API_HEADERS include/nnfw.h include/nnfw_dev.h) + +target_link_libraries(${NEURUN_DEV} PUBLIC nnfw-nnapi-header) +target_link_libraries(${NEURUN_DEV} PRIVATE neurun_core) +target_link_libraries(${NEURUN_DEV} PRIVATE jsoncpp tflite_loader circle_loader ${LIB_PTHREAD}) +target_link_libraries(${NEURUN_DEV} PRIVATE nnfw_common) +target_link_libraries(${NEURUN_DEV} PRIVATE nnfw_coverage) +target_include_directories(${NEURUN_DEV} PUBLIC include) +set_target_properties(${NEURUN_DEV} PROPERTIES PUBLIC_HEADER "${NNFW_API_HEADERS}") + +install(TARGETS ${NEURUN_DEV} + LIBRARY DESTINATION lib + PUBLIC_HEADER DESTINATION include/nnfw) diff --git a/runtime/neurun/api/include/nnfw.h b/runtime/neurun/api/include/nnfw.h new file mode 100644 index 000000000..c903fbcad --- /dev/null +++ b/runtime/neurun/api/include/nnfw.h @@ -0,0 +1,378 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file nnfw.h + * @brief This file describes runtime API + */ +#ifndef __NNFW_H__ +#define __NNFW_H__ + +#include <stddef.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Session to query with runtime + * + * <p>nnfw_session is started and passed by calling {@link nnfw_create_session}. + * Each session has its own inference environment, such as model to inference, backend usage, etc. + * + * <p>Load model by calling {@link nnfw_load_model_from_file} + * + * <p>After loading, prepare inference by calling {@link nnfw_prepare}. + * Application can set runtime environment before prepare by calling + * {@link nnfw_set_available_backends} and {@link nnfw_set_op_backend}, and it is optional. + * + * <p>Application can inference by calling {@link nnfw_run}. + * Before inference, application has responsibility to set input tensor to set input data by calling + * {@link nnfw_set_output}, and output tensor to get output by calling {@link nnfw_set_input} + * + * <p>To support input and output setting, application can get + * input and output tensor information by calling<ul> + * <li>{@link nnfw_input_size}</li> + * <li>{@link nnfw_output_size}</li> + * <li>{@link nnfw_input_tensorinfo}</li> + * <li>{@link nnfw_output_tensorinfo}</li> + * </ul> + * + * <p>Application can inference many times using one session, + * but next inference can do after prior inference end + * + * <p>Application cannot use muitiple model using one session + */ +typedef struct nnfw_session nnfw_session; + +/** + * @brief Tensor types + * + * The type of tensor represented in {@link nnfw_tensorinfo} + */ +typedef enum { + /** A tensor of 32 bit floating point */ + NNFW_TYPE_TENSOR_FLOAT32 = 0, + /** A tensor of 32 bit signed integer */ + NNFW_TYPE_TENSOR_INT32 = 1, + /** + * A tensor of 8 bit integers that represent real numbers. + * + * real_value = (integer_value - zeroPoint) * scale. + */ + NNFW_TYPE_TENSOR_QUANT8_ASYMM = 2, + /** A tensor of boolean */ + NNFW_TYPE_TENSOR_BOOL = 3, + /** A tensor of 8 bit unsigned integer */ + NNFW_TYPE_TENSOR_UINT8 = 4, +} NNFW_TYPE; + +/** + * @brief Result Values + */ +typedef enum { + /** Successful */ + NNFW_STATUS_NO_ERROR = 0, + /** Failed */ + NNFW_STATUS_ERROR = 1, +} NNFW_STATUS; + +/** + * @brief Data format of a tensor + */ +typedef enum { + /** Don't care layout */ + NNFW_LAYOUT_NONE = 0, + /** + * Channel last layout + * If rank is 4, layout is NHWC + */ + NNFW_LAYOUT_CHANNELS_LAST = 1, + /** + * Channel first layout + * If rank is 4, layout is NCHW + */ + NNFW_LAYOUT_CHANNELS_FIRST = 2, +} NNFW_LAYOUT; + +/** + * @brief tensor info describes the type and shape of tensors + * + * <p>This structure is used to describe input and output tensors. + * Application can get input and output tensor type and shape described in model by using + * {@link nnfw_input_tensorinfo} and {@link nnfw_output_tensorinfo} + * + * <p>Maximum rank is 6. And tensor's dimension value is filled in 'dims' field from index 0. + * For example, if tensor's rank is 4, + * application can get dimension value from dims[0], dims[1], dims[2], and dims[3] + */ +typedef struct nnfw_tensorinfo +{ + /** The data type */ + NNFW_TYPE dtype; + /** The number of dimensions (rank) */ + int32_t rank; + /** + * The dimension of tensor. + * Maximum rank is 6. + */ + int32_t dims[6]; +} nnfw_tensorinfo; + +/** + * @brief Create a new session instance. + * + * <p>This only creates a session. + * Model is loaded after {@link nnfw_load_model_from_file} is invoked. + * And inference is performed after {@link nnfw_run} is invoked. + * + * <p>{@link nnfw_close_session} should be called once + * if session is no longer need + * + * @param[out] session The session to be created + * @return NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_create_session(nnfw_session **session); + +/** + * @brief Close a session instance + * + * After called, access to closed session by application will be invalid + * + * @param[in] session The session to be closed + * @return @c NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_close_session(nnfw_session *session); + +/** + * @brief Load model from nnpackage file or directory + * + * @param[in] session nnfw_session loading the given nnpackage file/dir + * @param[in] package_file_path Path to the nnpackage file or unzipped directory to be loaded + * + * @return @c NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_load_model_from_file(nnfw_session *session, const char *package_file_path); + +/** + * @brief Apply i-th input's tensor info to resize input tensor + * + * This function should be called before {@link nnfw_prepare} is invoked, and + * should be called after {@link nnfw_load_model_from_file} is invoked + * See {@link nnfw_prepare} for information applying updated tensor info + * If this function is called many times for same index, tensor info is overwritten + * + * @param[in] session Session to the input tensor info is to be set + * @param[in] index Index of input to be applied (0-indexed) + * @param[in] tensor_info Tensor info to be applied + * @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR + */ +NNFW_STATUS nnfw_apply_tensorinfo(nnfw_session *session, uint32_t index, + nnfw_tensorinfo tensor_info); + +/** + * @brief Prepare session to be ready for inference + * + * This phase may finalize model compilation, scheduling, and additional settings. + * If {@link nnfw_apply_tensor} is called to apply input tensor info different with model + * before this function, tries to resize all tensors. + * + * @param[in] session the session to be prepared + * @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR + */ +NNFW_STATUS nnfw_prepare(nnfw_session *session); + +/** + * @brief Run inference + * + * <p>This function should be called after model is loaded by {@link nnfw_load_model_from_file}, + * session is prepared for inference by {@link nnfw_prepare}, set input and output buffers + * by {@link nnfw_set_input} and {@link nnfw_set_output}.</p> + * + * <p>This function return after inference is finished.</p> + * + * @param[in] session The session to run inference + * @return @c NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_run(nnfw_session *session); + +/** + * @brief Set input buffer + * + * This function should be called after {@link nnfw_prepare}, and before first inference + * on session by {@link nnfw_run}. Application can reuse buffer for many inferences. + * + * @param[in] session Session to the input is to be set + * @param[in] index Index of input to be set (0-indexed) + * @param[in] type Type of the input + * @param[in] buffer Raw buffer for input + * @param[in] length Size of bytes of input buffer + * + * @return @c NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_set_input(nnfw_session *session, uint32_t index, NNFW_TYPE type, + const void *buffer, size_t length); + +/** + * @brief Set output buffer + * + * This function should be called after {@link nnfw_prepare}, and before first inference + * on session by {@link nnfw_run}. Application can reuse buffer for many inferences. + * + * @param[in] session Session from inference output is to be extracted + * @param[in] index Index of output to be set (0-indexed) + * @param[in] type Type of the output + * @param[out] buffer Raw buffer for output + * @param[in] length Size of bytes of output buffer + * + * @return @c NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_set_output(nnfw_session *session, uint32_t index, NNFW_TYPE type, void *buffer, + size_t length); + +/** + * @brief Get the number of inputs + * + * Application can call this function to get number of inputs defined in loaded model. + * This function should be called after {@link nnfw_load_model_from_file} is invoked to load model + * + * @param[in] session Session from input information is to be extracted + * @param[out] number Variable which the number of inputs is put into + * + * @return @c NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_input_size(nnfw_session *session, uint32_t *number); + +/** + * @brief Get the number of outputs + * + * Application can call this function to get number of outputs defined in loaded model. + * This function should be called after {@link nnfw_load_model_from_file} is invoked to load model + * + * @param[in] session Session from output information is to be extracted + * @param[out] number Variable which the number of outputs is put into + * + * @return @c NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_output_size(nnfw_session *session, uint32_t *number); + +/** + * @brief Set the layout of an input + * + * The input that does not call this has NNFW_LAYOUT_NHWC layout + * + * @param[in] session session from inference input is to be extracted + * @param[in] index index of input to be set (0-indexed) + * @param[in] layout layout to set to target input + * + * @return NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_set_input_layout(nnfw_session *session, uint32_t index, NNFW_LAYOUT layout); + +/** + * @brief Set the layout of an output + * + * The output that does not call this has NNFW_LAYOUT_NHWC layout + * + * @param[in] session session from inference output is to be extracted + * @param[in] index index of output to be set (0-indexed) + * @param[in] layout layout to set to target output + * + * @return NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_set_output_layout(nnfw_session *session, uint32_t index, NNFW_LAYOUT layout); + +/** + * @brief Get i-th input tensor info + * + * <p>Before {@link nnfw_prepare} is invoked, this function return tensor info in model, + * so updated tensor info by {@link nnfw_apply_tensorinfo} is not returned.</p> + * + * <p>After {@link nnfw_prepare} is invoked, this function return updated tensor info + * if tensor info is updated by {@link nnfw_apply_tensorinfo}.</p> + * + * @param[in] session Session from input information is to be extracted + * @param[in] index Index of input + * @param[out] tensor_info Tensor info (shape, type, etc) + * + * @return @c NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_input_tensorinfo(nnfw_session *session, uint32_t index, + nnfw_tensorinfo *tensor_info); + +/** + * @brief Get i-th output tensor info + * + * <p>Before {@link nnfw_prepare} is invoked, this function return tensor info in model, + * so updated tensor info by {@link nnfw_apply_tensorinfo} is not returned.</p> + * + * <p>After {@link nnfw_prepare} is invoked, this function return updated tensor info + * if tensor info is updated by {@link nnfw_apply_tensorinfo}.</p> + * + * @param[in] session Session from output information is to be extracted + * @param[in] index Index of output + * @param[out] tensor_info Tensor info (shape, type, etc) + * + * @return @c NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_output_tensorinfo(nnfw_session *session, uint32_t index, + nnfw_tensorinfo *tensor_info); + +/** + * @brief Set available backends + * + * This function should be called before {@link nnfw_prepare} is invoked. + * + * <p>Supported backends differs on each platforms. + * For example, `x86_64` supports "cpu" only. + * Can set multiple backends by semicolon (ex: "acl_cl;cpu"). + * Among the multiple backends, the 1st element is used as default backend.</p> + * + * @note Possible backend strings are: "cpu", "acl_cl", "acl_neon", "srcn" + * + * @param[in] session session to which avilable backends are set + * @param[in] backends available backends on which nnfw uses + * + * @return @c NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_set_available_backends(nnfw_session *session, const char *backends); + +/** + * @brief Set the operation's backend + * + * This function should be called before {@link nnfw_prepare} is invoked. + * + * <p>Supported backends differs on each platforms. + * For example, `x86_64` supports "cpu" only. + * The backend for op has higher priority than default backend specified by + * nnfw_set_default_backend.</p> + * + * @note Possible backend strings are: "cpu", "acl_cl", "acl_neon" + * + * @param[in] session session to be modified + * @param[in] op operation to be set + * @param[in] backend bakcend on which operation run + * + * @return @c NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_set_op_backend(nnfw_session *session, const char *op, const char *backend); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/runtime/neurun/api/include/nnfw_debug.h b/runtime/neurun/api/include/nnfw_debug.h new file mode 100644 index 000000000..eefca0d29 --- /dev/null +++ b/runtime/neurun/api/include/nnfw_debug.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_DEBUG_H__ +#define __NNFW_DEBUG_H__ + +#include "nnfw.h" + +NNFW_STATUS nnfw_create_debug_session(nnfw_session **session); + +#endif // __NNFW_DEBUG_H__ diff --git a/runtime/neurun/api/include/nnfw_dev.h b/runtime/neurun/api/include/nnfw_dev.h new file mode 100644 index 000000000..ecf0597cf --- /dev/null +++ b/runtime/neurun/api/include/nnfw_dev.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_DEV_H__ +#define __NNFW_DEV_H__ + +#include "nnfw.h" + +// Used for custom kernel development + +/* + * operand type, used only for custom operations + */ +typedef struct +{ + nnfw_tensorinfo type; + void *allocation; +} nnfw_operand; + +/* + * Used as input to custom operation eval function + */ +typedef struct +{ + size_t ninputs; + nnfw_operand *inputs; + + size_t noutputs; + nnfw_operand *outputs; +} nnfw_custom_kernel_params; + +/* + * Custom kernel evaluation function + * + * param[in] params custom operation parameters + * param[in] userdata pointer to user-specified buffer( kernel instance specific ) + */ +typedef void (*nnfw_custom_eval)(nnfw_custom_kernel_params *params, char *userdata, + size_t userdata_size); + +/* + * custom operation registration info + */ +typedef struct +{ + nnfw_custom_eval eval_function; +} custom_kernel_registration_info; + +NNFW_STATUS nnfw_register_custom_op_info(nnfw_session *session, const char *id, + custom_kernel_registration_info *info); + +#endif // __NNFW_DEV_H__ diff --git a/runtime/neurun/api/src/CustomKernel.cc b/runtime/neurun/api/src/CustomKernel.cc new file mode 100644 index 000000000..60ddeedc2 --- /dev/null +++ b/runtime/neurun/api/src/CustomKernel.cc @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CustomKernel.h" + +namespace neurun +{ +namespace frontend +{ +namespace custom +{ + +using namespace backend::custom; + +class APIConverter +{ +public: + static nnfw_operand convertOperand(void *alloc, const TypeInfo &type) + { + nnfw_operand api_operand; + api_operand.allocation = alloc; + api_operand.type = convertType(type); + return api_operand; + } + + static nnfw_tensorinfo convertType(const TypeInfo &type) + { + nnfw_tensorinfo api_type; + api_type.rank = type.shape.rank(); + assert(type.shape.rank() <= 6); + std::copy(type.shape.dims().begin(), type.shape.dims().end(), std::begin(api_type.dims)); + + switch (type.dtype) + { + case ir::DataType::FLOAT32: + api_type.dtype = NNFW_TYPE_TENSOR_FLOAT32; + break; + case ir::DataType::INT32: + api_type.dtype = NNFW_TYPE_TENSOR_INT32; + break; + case ir::DataType::QUANT8_ASYMM: + api_type.dtype = NNFW_TYPE_TENSOR_QUANT8_ASYMM; + break; + case ir::DataType::BOOL8: + api_type.dtype = NNFW_TYPE_TENSOR_BOOL; + break; + default: + throw std::runtime_error("Unsupported tensor datatype"); + } + return api_type; + } +}; + +Kernel::Kernel(const nnfw_custom_eval evalFunction) + : _params(), _userdata(nullptr), _userdata_size(0), _evalFunction(evalFunction) +{ +} + +void Kernel::configure(CustomKernelConfigParams &&inParams) +{ + _userdata = inParams.userdata; + _userdata_size = inParams.userdata_size; + + _params.ninputs = inParams.input_allocations.size(); + _params.inputs = new nnfw_operand[_params.ninputs]; + for (size_t i = 0; i < _params.ninputs; ++i) + { + _params.inputs[i] = + APIConverter::convertOperand(inParams.input_allocations[i], inParams.input_types[i]); + } + + _params.noutputs = inParams.output_allocations.size(); + _params.outputs = new nnfw_operand[_params.noutputs]; + for (size_t i = 0; i < _params.noutputs; ++i) + { + _params.outputs[i] = + APIConverter::convertOperand(inParams.output_allocations[i], inParams.output_types[i]); + } +} + +void Kernel::run() { _evalFunction(&_params, _userdata, _userdata_size); } + +} // namespace custom +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/api/src/CustomKernel.h b/runtime/neurun/api/src/CustomKernel.h new file mode 100644 index 000000000..8cafc2061 --- /dev/null +++ b/runtime/neurun/api/src/CustomKernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CUSTOM_KERNEL_H__ +#define __NEURUN_BACKEND_CUSTOM_KERNEL_H__ + +#include "nnfw_dev.h" + +#include "backend/CustomKernelBuilder.h" + +#include <vector> + +namespace neurun +{ +namespace frontend +{ +namespace custom +{ + +class Kernel : public ::neurun::exec::IFunction +{ +public: + explicit Kernel(nnfw_custom_eval evalFunction); + + nnfw_custom_kernel_params _params; + char *_userdata; + size_t _userdata_size; + + nnfw_custom_eval _evalFunction; + // nnfw_custom_type_infer _type_infer_function; //Unused for now + + /** + * Fills _params field used later by user specified eval function + * @param inParams custom kernel parameters + */ + virtual void configure(backend::custom::CustomKernelConfigParams &&inParams); + + void run() override; + void runSync() override { run(); } +}; + +} // namespace custom +} // namespace frontend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CUSTOM_KERNEL_H__ diff --git a/runtime/neurun/api/src/CustomKernelRegistry.cc b/runtime/neurun/api/src/CustomKernelRegistry.cc new file mode 100644 index 000000000..b223682b8 --- /dev/null +++ b/runtime/neurun/api/src/CustomKernelRegistry.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CustomKernelRegistry.h" + +#include "cpp14/memory.h" + +namespace neurun +{ +namespace frontend +{ +namespace custom +{ + +void KernelRegistry::registerKernel(const std::string &id, nnfw_custom_eval evalFunction) +{ + _storage.emplace(id, evalFunction); +} + +std::shared_ptr<backend::custom::IKernelBuilder> KernelRegistry::getBuilder() +{ + return nnfw::cpp14::make_unique<KernelBuilder>(this); +} + +std::unique_ptr<Kernel> KernelRegistry::buildKernelForOp(const std::string &id) +{ + auto it = _storage.find(id); + if (it == _storage.end()) + { + throw std::runtime_error("Unable to find associated kernel for op"); + } + + return nnfw::cpp14::make_unique<Kernel>(it->second); +} + +// Kernel builder +std::unique_ptr<exec::IFunction> +KernelBuilder::buildKernel(const std::string &id, + backend::custom::CustomKernelConfigParams &¶ms) const +{ + auto kernel = _registry->buildKernelForOp(id); + kernel->configure(std::move(params)); + + return kernel; +} + +KernelBuilder::KernelBuilder(KernelRegistry *registry) : _registry(registry) {} + +} // namespace custom +} // namespace frontend +} // namespace neurun diff --git a/runtime/neurun/api/src/CustomKernelRegistry.h b/runtime/neurun/api/src/CustomKernelRegistry.h new file mode 100644 index 000000000..207a82a0a --- /dev/null +++ b/runtime/neurun/api/src/CustomKernelRegistry.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CUSTOM_KERNEL_REGISTRY_H__ +#define __NEURUN_BACKEND_CUSTOM_KERNEL_REGISTRY_H__ + +#include "CustomKernel.h" + +#include <unordered_map> +#include <functional> +#include <memory> + +#include <iostream> + +namespace neurun +{ +namespace frontend +{ +namespace custom +{ + +class KernelRegistry +{ +public: + void registerKernel(const std::string &id, nnfw_custom_eval evalFunction); + + std::shared_ptr<backend::custom::IKernelBuilder> getBuilder(); + std::unique_ptr<Kernel> buildKernelForOp(const std::string &id); + +private: + std::unordered_map<std::string, nnfw_custom_eval> _storage; +}; + +class KernelBuilder : public backend::custom::IKernelBuilder +{ +public: + KernelBuilder(KernelRegistry *registry); + + std::unique_ptr<exec::IFunction> + buildKernel(const std::string &id, + backend::custom::CustomKernelConfigParams &¶ms) const override; + +private: + KernelRegistry *_registry; +}; + +} // namespace custom +} // namespace frontend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CUSTOM_KERNEL_REGISTRY_H__ diff --git a/runtime/neurun/api/src/OpMap.lst b/runtime/neurun/api/src/OpMap.lst new file mode 100644 index 000000000..5e93275b8 --- /dev/null +++ b/runtime/neurun/api/src/OpMap.lst @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MAP_MACRO +#error Define MAP_MACRO before including this file +#endif + +// circle operation | neurun internal operation +MAP_MACRO(ADD , Add) +MAP_MACRO(SUB , Sub) +MAP_MACRO(BATCH_TO_SPACE_ND , BatchToSpaceND) +MAP_MACRO(CAST , Cast) +MAP_MACRO(CONV_2D , Conv2D) +MAP_MACRO(DEPTHWISE_CONV_2D , DepthwiseConv2D) +MAP_MACRO(AVERAGE_POOL_2D , AvgPool2D) +MAP_MACRO(MAX_POOL_2D , MaxPool2D) +MAP_MACRO(CONCATENATION , Concat) +MAP_MACRO(FULLY_CONNECTED , FullyConnected) +MAP_MACRO(SUM , ReduceSum) +MAP_MACRO(RESHAPE , Reshape) +MAP_MACRO(MUL , Mul) +MAP_MACRO(SOFTMAX , Softmax) +MAP_MACRO(SQUEEZE , Squeeze) +MAP_MACRO(SLICE , Slice) +MAP_MACRO(STRIDED_SLICE , StridedSlice) +MAP_MACRO(TANH , Tanh) +MAP_MACRO(LOGISTIC , Logistic) +MAP_MACRO(DIV , Div) +MAP_MACRO(TRANSPOSE , Transpose) +MAP_MACRO(EXP , Exp) +MAP_MACRO(REDUCE_MAX , ReduceMax) +// UNMATCHED +//MAP_MACRO(Comparison) +MAP_MACRO(LOGICAL_AND , LogicalAnd) +MAP_MACRO(LOGICAL_OR , LogicalOr) +MAP_MACRO(LOGICAL_NOT , LogicalNot) +MAP_MACRO(LSTM , LSTM) +MAP_MACRO(RSQRT , RSQRT) +MAP_MACRO(RELU , ReLU) +MAP_MACRO(RESIZE_BILINEAR , ResizeBilinear) +MAP_MACRO(RELU_N1_TO_1 , ReLU1) +MAP_MACRO(RELU6 , ReLU6) +MAP_MACRO(RNN , RNN) +MAP_MACRO(FLOOR , Floor) +MAP_MACRO(SPACE_TO_BATCH_ND , SpaceToBatchND) +MAP_MACRO(SPACE_TO_DEPTH , SpaceToDepth) +MAP_MACRO(L2_POOL_2D , L2Pool2D) +MAP_MACRO(EMBEDDING_LOOKUP , EmbeddingLookup) +MAP_MACRO(L2_NORMALIZATION , L2Normalization) +MAP_MACRO(HASHTABLE_LOOKUP , HashtableLookup) +MAP_MACRO(INSTANCE_NORM , InstanceNorm) +MAP_MACRO(PRELU , PReLU) +MAP_MACRO(TRANSPOSE_CONV , TransposeConv) +MAP_MACRO(SQRT , SQRT) +MAP_MACRO(SQUARED_DIFFERENCE , SquaredDifference) +MAP_MACRO(TOPK_V2 , TopKV2) +MAP_MACRO(GATHER , Gather) +MAP_MACRO(NEG , Neg) +MAP_MACRO(ABS , Abs) +MAP_MACRO(ARG_MAX , ArgMax) +MAP_MACRO(DEQUANTIZE , Dequantize) +MAP_MACRO(MEAN , Mean) +MAP_MACRO(LOCAL_RESPONSE_NORMALIZATION , LocalResponseNormalization) +// UNDEFINED IN CIRCLE +//MAP_MACRO(DepthToSpace) +MAP_MACRO(PACK , Pack) +MAP_MACRO(REDUCE_MIN , ReduceMin) +MAP_MACRO(SPLIT , Split) +MAP_MACRO(UNPACK , Unpack) +MAP_MACRO(PAD , Pad) +MAP_MACRO(CUSTOM , Custom) +// UNDEFINED IN CIRCLE +//MAP_MACRO(Permute) +MAP_MACRO(MINIMUM , Min) +MAP_MACRO(MAXIMUM , Max) +MAP_MACRO(ONE_HOT , OneHot) diff --git a/runtime/neurun/api/src/nnfw_api.cc b/runtime/neurun/api/src/nnfw_api.cc new file mode 100644 index 000000000..bdac4c89b --- /dev/null +++ b/runtime/neurun/api/src/nnfw_api.cc @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnfw_api_internal.h" + +/* + * Create a new session instance + * + * @param session the session to be created + * @return NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_create_session(nnfw_session **session) +{ + *session = new nnfw_session(); + + return NNFW_STATUS_NO_ERROR; +} + +/* + * Close a session instance + * + * @param session the session to be closed + * @return NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_close_session(nnfw_session *session) +{ + delete session; + return NNFW_STATUS_NO_ERROR; +} + +#define NNFW_RETURN_ERROR_IF_NULL(p) \ + do \ + { \ + if ((p) == NULL) \ + return NNFW_STATUS_ERROR; \ + } while (0) + +/* + * Load model from nnpackage file or directory + * + * @param session nnfw_session loading the given nnpackage file/dir + * @param package_file_path path to the nnpackage file or unzipped directory to be loaded + * + * @return NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_load_model_from_file(nnfw_session *session, const char *pacakge_file_path) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->load_model_from_file(pacakge_file_path); +} + +/* + * Prepare session to be ready for inference + * This phase may finalize model compilation, scheduling, and additional settings. + * + * @param session the session to be prepared + * @return NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_prepare(nnfw_session *session) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->prepare(); +} + +/* + * Run inference + * + * @param session the session to run inference + * @return NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_run(nnfw_session *session) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->run(); +} + +/* + * Set input + * + * @param session session to the input is to be set + * @param index index of input to be set (0-indexed) + * @param type type of the input + * @param buffer raw buffer for input + * @param length size of bytes of output + * + * @return NNFW_STATUS_NO_ERROR if successful + */ + +NNFW_STATUS nnfw_set_input(nnfw_session *session, uint32_t index, NNFW_TYPE type, + const void *buffer, size_t length) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->set_input(index, type, buffer, length); +} + +/* + * Set output + * + * @param session session from inference output is to be extracted + * @param index index of output to be set (0-indexed) + * @param type type of the output + * @param buffer raw buffer for output + * @param length size of bytes of output + * + * @return NNFW_STATUS_NO_ERROR if successful + */ + +NNFW_STATUS nnfw_set_output(nnfw_session *session, uint32_t index, NNFW_TYPE type, void *buffer, + size_t length) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->set_output(index, type, buffer, length); +} + +/* + * Get the number of inputs + * + * @param[in] session session from input information is to be extracted + * @param[out] number variable which the number of inputs is put into + * + * @return NNFW_STATUS_NO_ERROR if successful + */ + +NNFW_STATUS nnfw_input_size(nnfw_session *session, uint32_t *number) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->input_size(number); +} + +/* + * Get the number of outputs + * + * @param[in] session session from output information is to be extracted + * @param[out] number variable which the number of outputs is put into + * + * @return NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_output_size(nnfw_session *session, uint32_t *number) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->output_size(number); +} + +/* + * Set the layout of an input + * @note The input that does not call this has NNFW_LAYOUT_CHANNELS_LAST layout + * + * @param[in] session session from inference input is to be extracted + * @param[in] index index of input to be set (0-indexed) + * @param[in] layout layout to set to target input + * + * @return NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_set_input_layout(nnfw_session *session, uint32_t index, NNFW_LAYOUT layout) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->set_input_layout(index, layout); +} + +/* + * Set the layout of an output + * @note The output that does not call this has NNFW_LAYOUT_CHANNELS_LAST layout + * + * @param[in] session session from inference output is to be extracted + * @param[in] index index of output to be set (0-indexed) + * @param[in] layout layout to set to target output + * + * @return NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_set_output_layout(nnfw_session *session, uint32_t index, NNFW_LAYOUT layout) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->set_output_layout(index, layout); +} + +/* + * Get i-th input tensor info + * + * @param[in] session session from input information is to be extracted + * @param[in] index index of input + * @param[out] tensor_info nnfw_tensor_info + * + * @return NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_input_tensorinfo(nnfw_session *session, uint32_t index, + nnfw_tensorinfo *tensor_info) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->input_tensorinfo(index, tensor_info); +} + +/* + * Get i-th output tensor info + * + * @param[in] session session from output information is to be extracted + * @param[in] index index of output + * @param[out] tensor_info nnfw_tensor_info + * + * @return NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_output_tensorinfo(nnfw_session *session, uint32_t index, + nnfw_tensorinfo *tensor_info) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->output_tensorinfo(index, tensor_info); +} + +/* + * Register custom operation + * @param session session to register this operation + * @param id operation id + * @param info registration info ( eval function, etc. ) + * @return NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_register_custom_op_info(nnfw_session *session, const char *id, + custom_kernel_registration_info *info) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->register_custom_operation(id, info->eval_function); +} + +NNFW_STATUS nnfw_apply_tensorinfo(nnfw_session *session, uint32_t index, + nnfw_tensorinfo tensor_info) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->apply_tensorinfo(index, tensor_info); +} + +/* + * Set available backends + * + * @param[in] session session to which a avilable backends are set + * @param[in] backends available backends on which nnfw uses + */ +NNFW_STATUS nnfw_set_available_backends(nnfw_session *session, const char *backends) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->set_available_backends(backends); +} + +/* + * Set the operation's backend + * + * @param[in] session session to be modified + * @param[in] op operation to be set + * @param[in] backend bakcend on which operation run + * + * @return NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_set_op_backend(nnfw_session *session, const char *op, const char *backend) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->set_op_backend(op, backend); +} diff --git a/runtime/neurun/api/src/nnfw_api_internal.cc b/runtime/neurun/api/src/nnfw_api_internal.cc new file mode 100644 index 000000000..037cd3bca --- /dev/null +++ b/runtime/neurun/api/src/nnfw_api_internal.cc @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnfw_api_internal.h" +#include "CustomKernelRegistry.h" +#include "compiler/Compiler.h" +#include "exec/Execution.h" +#include "circle_loader.h" +#include "tflite_loader.h" +#include "json/json.h" +#include <fstream> +#include <iostream> +#include <string> +#include <dirent.h> +#include <util/ConfigSource.h> + +/* + * API does not accept string argument longer than max length below + */ +#define MAX_BACKEND_NAME_LENGTH 32 +#define MAX_OP_NAME_LENGTH 64 + +// Is null-terminating in length ? +static bool null_terminating(const char *str, uint32_t length) +{ + for (uint32_t i = 0; i < length; i++) + { + if (*(str + i) == '\0') + { + return true; + } + } + return false; +} + +static neurun::ir::Layout convertLayout(NNFW_LAYOUT layout) +{ + if (layout == NNFW_LAYOUT_CHANNELS_LAST) + { + return neurun::ir::Layout::NHWC; + } + else if (layout == NNFW_LAYOUT_CHANNELS_FIRST) + { + return neurun::ir::Layout::NCHW; + } + return neurun::ir::Layout::UNKNOWN; +} + +nnfw_session::nnfw_session() + : _graph{nullptr}, _execution{nullptr}, + _kernel_registry{std::make_shared<neurun::frontend::custom::KernelRegistry>()}, + _source{nnfw::cpp14::make_unique<neurun::util::GeneralConfigSource>()} +{ + // DO NOTHING +} + +NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir) +{ + // TODO : add support for zipped package file load + DIR *dir; + if (!(dir = opendir(package_dir))) + { + std::cerr << "invalid nnpackge directory: " << package_dir << std::endl; + return NNFW_STATUS_ERROR; + } + closedir(dir); + + try + { + std::string manifest_file_name(package_dir); + manifest_file_name += "/metadata/MANIFEST"; + std::ifstream mfs(manifest_file_name); + + // extract the filename of the first(index 0) model + // e.g. In MANIFEST file, { "models" : [ "firstmodel.tflite", "2nd.tflite" ] } + Json::Value root; + mfs >> root; + Json::Value models = root["models"]; + Json::Value model_types = root["model-types"]; + + auto model_file_path = package_dir + std::string("/") + models[0].asString(); // first model + auto model_type = model_types[0].asString(); // first model's type + if (model_type == "tflite") + { + _graph = neurun::tflite_loader::loadModel(model_file_path.c_str()); + } + else if (model_type == "circle") + { + _graph = neurun::circle_loader::loadModel(model_file_path.c_str()); + } + else + { + std::cerr << "Unsupported model type in MANIFEST" << std::endl; + return NNFW_STATUS_ERROR; + } + _graph->bindKernelBuilder(_kernel_registry->getBuilder()); + } + catch (const std::exception &e) + { + std::cerr << "Error during model loading : " << e.what() << std::endl; + return NNFW_STATUS_ERROR; + } + return NNFW_STATUS_NO_ERROR; +} + +NNFW_STATUS nnfw_session::prepare() +{ + // TODO : add additional setting routine(executor type, backend) + // Note that we assume acl_cl backend + + try + { + // config_source setting + using neurun::util::config_source; + config_source(std::move(_source)); + + auto compiler = nnfw::cpp14::make_unique<neurun::compiler::Compiler>(_graph); + compiler->compile(); + std::shared_ptr<neurun::exec::IExecutor> executor; + compiler->release(executor); + _execution = std::make_shared<neurun::exec::Execution>(executor); + } + catch (const std::exception &e) + { + std::cerr << "Error during model prepare : " << e.what() << std::endl; + return NNFW_STATUS_ERROR; + } + return NNFW_STATUS_NO_ERROR; +} + +NNFW_STATUS nnfw_session::run() +{ + try + { + _execution->execute(); + } + catch (const std::exception &e) + { + std::cerr << "Error during nnfw_session::run : " << e.what() << std::endl; + return NNFW_STATUS_ERROR; + } + return NNFW_STATUS_NO_ERROR; +} + +NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE /*type*/, const void *buffer, + size_t length) +{ + try + { + _execution->setInput(neurun::ir::IOIndex(index), buffer, length); + } + catch (const std::exception &e) + { + std::cerr << "Error during nnfw_session::set_input : " << e.what() << std::endl; + return NNFW_STATUS_ERROR; + } + return NNFW_STATUS_NO_ERROR; +} + +NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE /*type*/, void *buffer, + size_t length) +{ + try + { + _execution->setOutput(neurun::ir::IOIndex(index), buffer, length); + } + catch (const std::exception &e) + { + std::cerr << "Error during nnfw_session::set_output : " << e.what() << std::endl; + return NNFW_STATUS_ERROR; + } + return NNFW_STATUS_NO_ERROR; +} + +NNFW_STATUS nnfw_session::input_size(uint32_t *number) +{ + try + { + if (number == nullptr) + { + std::cerr << "Error during nnfw_session::input_size, number is null pointer." << std::endl; + return NNFW_STATUS_ERROR; + } + *number = _graph->getInputs().size(); + } + catch (const std::exception &e) + { + std::cerr << "Error during nnfw_session::input_size : " << e.what() << std::endl; + return NNFW_STATUS_ERROR; + } + return NNFW_STATUS_NO_ERROR; +} + +NNFW_STATUS nnfw_session::output_size(uint32_t *number) +{ + try + { + if (number == nullptr) + { + std::cerr << "Error during nnfw_session::output_size, number is null pointer." << std::endl; + return NNFW_STATUS_ERROR; + } + *number = _graph->getOutputs().size(); + } + catch (const std::exception &e) + { + std::cerr << "Error during nnfw_session::output_size" << e.what() << std::endl; + return NNFW_STATUS_ERROR; + } + return NNFW_STATUS_NO_ERROR; +} + +NNFW_STATUS nnfw_session::set_input_layout(uint32_t index, NNFW_LAYOUT layout) +{ + try + { + if (layout != NNFW_LAYOUT_NONE && layout != NNFW_LAYOUT_CHANNELS_FIRST && + layout != NNFW_LAYOUT_CHANNELS_LAST) + { + std::cerr << "Error during nnfw_session::set_input_layout, not supported layout" << std::endl; + return NNFW_STATUS_ERROR; + } + _execution->setInputLayout(neurun::ir::IOIndex(index), convertLayout(layout)); + } + catch (const std::exception &e) + { + std::cerr << "Error during nnfw_session::set_input_layout : " << e.what() << std::endl; + return NNFW_STATUS_ERROR; + } + return NNFW_STATUS_NO_ERROR; +} + +NNFW_STATUS nnfw_session::set_output_layout(uint32_t index, NNFW_LAYOUT layout) +{ + try + { + if (layout != NNFW_LAYOUT_NONE && layout != NNFW_LAYOUT_CHANNELS_FIRST && + layout != NNFW_LAYOUT_CHANNELS_LAST) + { + std::cerr << "Error during nnfw_session::set_output_layout, not supported layout" + << std::endl; + return NNFW_STATUS_ERROR; + } + _execution->setOutputLayout(neurun::ir::IOIndex(index), convertLayout(layout)); + } + catch (const std::exception &e) + { + std::cerr << "Error during nnfw_session::set_output_layout : " << e.what() << std::endl; + return NNFW_STATUS_ERROR; + } + return NNFW_STATUS_NO_ERROR; +} + +static NNFW_TYPE datatype_to_nnfw_dtype(neurun::ir::DataType dt) +{ + using neurun::ir::DataType; + switch (dt) + { + case DataType::FLOAT32: + return NNFW_TYPE_TENSOR_FLOAT32; + case DataType::INT32: + return NNFW_TYPE_TENSOR_INT32; + case DataType::QUANT8_ASYMM: + return NNFW_TYPE_TENSOR_QUANT8_ASYMM; + case DataType::BOOL8: + return NNFW_TYPE_TENSOR_BOOL; + case DataType::UINT8: + return NNFW_TYPE_TENSOR_UINT8; + case DataType::UINT32: + case DataType::QUANT8_SYMM: + default: + std::cerr << "Error: Model has type that runtime API does not support." << std::endl; + exit(-1); + } +} + +NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t /*index*/, nnfw_tensorinfo /*ti*/) +{ + std::cerr << "Error: NYI" << std::endl; + return NNFW_STATUS_ERROR; +} + +NNFW_STATUS nnfw_session::input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti) +{ + try + { + if (ti == nullptr) + { + std::cerr << "Error during nnfw_session::input_tensorinfo, tensorinfo is null pointer." + << std::endl; + return NNFW_STATUS_ERROR; + } + if (index >= _graph->getInputs().size()) + { + std::cerr << "Error during nnfw_session::input_tensorinfo, index is out of range." + << std::endl; + return NNFW_STATUS_ERROR; + } + auto opidx = _graph->getInputs().at(index); + auto shape = _graph->operands().at(opidx).shape(); + ti->rank = shape.rank(); + for (int j = 0; j < ti->rank; ++j) + { + ti->dims[j] = shape.dim(j); + } + ti->dtype = datatype_to_nnfw_dtype(_graph->operands().at(opidx).typeInfo().type()); + } + catch (const std::exception &e) + { + std::cerr << "Error during nnfw_session::input_tensorinfo : " << e.what() << std::endl; + return NNFW_STATUS_ERROR; + } + return NNFW_STATUS_NO_ERROR; +} + +NNFW_STATUS nnfw_session::output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti) +{ + try + { + if (ti == nullptr) + { + std::cerr << "Error during nnfw_session::output_tensorinfo, tensorinfo is null pointer." + << std::endl; + return NNFW_STATUS_ERROR; + } + if (index >= _graph->getOutputs().size()) + { + std::cerr << "Error during nnfw_session::output_tensorinfo, index is out of range." + << std::endl; + return NNFW_STATUS_ERROR; + } + auto opidx = _graph->getOutputs().at(index); + auto shape = _graph->operands().at(opidx).shape(); + ti->rank = shape.rank(); + for (int j = 0; j < ti->rank; ++j) + { + ti->dims[j] = shape.dim(j); + } + ti->dtype = datatype_to_nnfw_dtype(_graph->operands().at(opidx).typeInfo().type()); + } + catch (const std::exception &e) + { + std::cerr << "Error during nnfw_session::output_tensorinfo : " << e.what() << std::endl; + return NNFW_STATUS_ERROR; + } + return NNFW_STATUS_NO_ERROR; +} +NNFW_STATUS nnfw_session::register_custom_operation(const std::string &id, + nnfw_custom_eval eval_func) +{ + _kernel_registry->registerKernel(id, eval_func); + return NNFW_STATUS_NO_ERROR; +} + +static std::string get_op_backend_string(std::string op) +{ +#define MAP_MACRO(CircleName, NeurunName) {#CircleName, "OP_BACKEND_" #NeurunName}, + + static std::unordered_map<std::string, std::string> operation_map = { +#include "OpMap.lst" + }; + +#undef MAP_MACRO + + auto n = operation_map.find(op); + + if (n == operation_map.end()) + { + // this return value is handled by a caller to return error code + return std::string(""); + } + else + { + return n->second; + } +} + +NNFW_STATUS nnfw_session::set_available_backends(const char *backends) +{ + try + { + if (!backends || null_terminating(backends, MAX_BACKEND_NAME_LENGTH) == false) + { + return NNFW_STATUS_ERROR; + } + + _source->set("BACKENDS", backends); + } + catch (const std::exception &e) + { + std::cerr << "Error during nnfw_session::set_available_backends : " << e.what() << std::endl; + return NNFW_STATUS_ERROR; + } + return NNFW_STATUS_NO_ERROR; +} + +NNFW_STATUS nnfw_session::set_op_backend(const char *op, const char *backend) +{ + try + { + if (!op || !null_terminating(op, MAX_OP_NAME_LENGTH) || !backend || + !null_terminating(backend, MAX_BACKEND_NAME_LENGTH)) + { + return NNFW_STATUS_ERROR; + } + + auto key = get_op_backend_string(op); + + if (key.empty()) + { + return NNFW_STATUS_ERROR; + } + + _source->set(key, backend); + } + catch (const std::exception &e) + { + std::cerr << "Error during nnfw_session::set_op_backend : " << e.what() << std::endl; + return NNFW_STATUS_ERROR; + } + return NNFW_STATUS_NO_ERROR; +} diff --git a/runtime/neurun/api/src/nnfw_api_internal.h b/runtime/neurun/api/src/nnfw_api_internal.h new file mode 100644 index 000000000..40069cc55 --- /dev/null +++ b/runtime/neurun/api/src/nnfw_api_internal.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __API_NNFW_API_INTERNAL_H__ +#define __API_NNFW_API_INTERNAL_H__ + +#include "nnfw.h" +#include "nnfw_dev.h" + +#include <util/GeneralConfigSource.h> + +#include <string> +#include <memory> + +namespace neurun +{ +namespace frontend +{ +namespace custom +{ +class KernelRegistry; +} +} // namespace frontend +namespace exec +{ +class Execution; +} +namespace ir +{ +class Graph; +} // namespace ir +} // namespace neurun + +struct nnfw_session +{ +public: + nnfw_session(); + + NNFW_STATUS load_model_from_file(const char *package_file_path); + NNFW_STATUS prepare(); + NNFW_STATUS run(); + + NNFW_STATUS set_input(uint32_t index, NNFW_TYPE type, const void *buffer, size_t length); + NNFW_STATUS set_output(uint32_t index, NNFW_TYPE type, void *buffer, size_t length); + + NNFW_STATUS input_size(uint32_t *number); + NNFW_STATUS output_size(uint32_t *number); + + NNFW_STATUS set_input_layout(uint32_t index, NNFW_LAYOUT layout); + NNFW_STATUS set_output_layout(uint32_t index, NNFW_LAYOUT layout); + + NNFW_STATUS apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti); + + NNFW_STATUS input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti); + NNFW_STATUS output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti); + + NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func); + + NNFW_STATUS set_available_backends(const char *backends); + NNFW_STATUS set_op_backend(const char *op, const char *backend); + +private: + std::shared_ptr<neurun::ir::Graph> _graph; + std::shared_ptr<neurun::exec::Execution> _execution; + std::shared_ptr<neurun::frontend::custom::KernelRegistry> _kernel_registry; + +protected: + std::unique_ptr<neurun::util::GeneralConfigSource> _source; +}; + +#endif // __API_NNFW_API_INTERNAL_H__ diff --git a/runtime/neurun/api/src/nnfw_debug.cc b/runtime/neurun/api/src/nnfw_debug.cc new file mode 100644 index 000000000..4ea0a203f --- /dev/null +++ b/runtime/neurun/api/src/nnfw_debug.cc @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnfw_debug_internal.h" + +NNFW_STATUS nnfw_create_debug_session(nnfw_session **session) +{ + *session = new nnfw_debug_session(); + + return NNFW_STATUS_NO_ERROR; +} diff --git a/runtime/neurun/api/src/nnfw_debug_internal.cc b/runtime/neurun/api/src/nnfw_debug_internal.cc new file mode 100644 index 000000000..778efbc5c --- /dev/null +++ b/runtime/neurun/api/src/nnfw_debug_internal.cc @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnfw_debug_internal.h" +#include "util/EnvConfigSource.h" + +#include <cpp14/memory.h> + +nnfw_debug_session::nnfw_debug_session() : nnfw_session() +{ + _source = nnfw::cpp14::make_unique<neurun::util::EnvConfigSource>(); +} diff --git a/runtime/neurun/api/src/nnfw_debug_internal.h b/runtime/neurun/api/src/nnfw_debug_internal.h new file mode 100644 index 000000000..f4984e7a1 --- /dev/null +++ b/runtime/neurun/api/src/nnfw_debug_internal.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __API_NNFW_DEBUG_INTERNAL_H__ +#define __API_NNFW_DEBUG_INTERNAL_H__ + +#include "nnfw_debug.h" +#include "nnfw_api_internal.h" + +class nnfw_debug_session : public nnfw_session +{ +public: + nnfw_debug_session(); +}; + +#endif // __API_NNFW_DEBUG_INTERNAL_H__ diff --git a/runtime/neurun/backend/CMakeLists.txt b/runtime/neurun/backend/CMakeLists.txt new file mode 100644 index 000000000..fc363e031 --- /dev/null +++ b/runtime/neurun/backend/CMakeLists.txt @@ -0,0 +1,10 @@ +set(LIB_NEURUN_BACKEND_ACL_COMMON neurun_backend_acl_common) +set(LIB_NEURUN_BACKEND_CPU_COMMON neurun_backend_cpu_common) + +add_subdirectory(cpu) +add_subdirectory(cpu_common) +add_subdirectory(acl_cl) +add_subdirectory(acl_neon) +add_subdirectory(acl_common) +add_subdirectory(hi_perf_cpu) +add_subdirectory(srcn) diff --git a/runtime/neurun/backend/acl_cl/Backend.h b/runtime/neurun/backend/acl_cl/Backend.h new file mode 100644 index 000000000..2033b42e7 --- /dev/null +++ b/runtime/neurun/backend/acl_cl/Backend.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_CL_BACKEND_H__ +#define __NEURUN_BACKEND_ACL_CL_BACKEND_H__ + +#include <memory> +#include <backend/Backend.h> +#include <ir/Operands.h> + +#include "Config.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" +#include "ShapeFixer.h" +#include "TensorManager.h" +#include "TensorRegister.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ + +class Backend : public ::neurun::backend::Backend +{ +public: + Backend() : _config{std::make_shared<Config>()} {} + + std::shared_ptr<IConfig> config() const override { return _config; } + + std::unique_ptr<BackendContext> + newContext(const ir::Operands &operands, + const std::shared_ptr<custom::IKernelBuilder> &) const override + { + auto tensor_builder = std::make_shared<TensorBuilder>(createTensorManager()); + return std::unique_ptr<BackendContext>{new BackendContext{ + this, tensor_builder, std::make_shared<ConstantInitializer>(operands, tensor_builder), + std::make_shared<KernelGenerator>(operands, tensor_builder), + std::make_shared<ShapeFixer>(operands, tensor_builder), + std::make_shared<TensorRegister>(operands, tensor_builder)}}; + } + +private: + std::shared_ptr<IConfig> _config; +}; + +} // namespace acl_cl +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_CL_BACKEND_H__ diff --git a/runtime/neurun/backend/acl_cl/CLTimer.h b/runtime/neurun/backend/acl_cl/CLTimer.h new file mode 100644 index 000000000..3939ee722 --- /dev/null +++ b/runtime/neurun/backend/acl_cl/CLTimer.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_CL_CLTIMER_H__ +#define __NEURUN_BACKEND_ACL_CL_CLTIMER_H__ + +#include <util/ITimer.h> +#include <arm_compute/core/CL/OpenCL.h> +#include <arm_compute/runtime/CL/CLScheduler.h> +#include <chrono> +#include <list> +#include <sstream> + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ + +/** + * @brief Class to measure CL kernels execution time + */ +class CLTimer : public util::ITimer +{ +public: + /** + * @brief This function replaces CL function, which enqueues a command to execute a kernel + * with a wrapper which remembers enqueued kernels + */ + void handleBegin() override + { + _measured_events.clear(); + + _origin_enqueue_function = arm_compute::CLSymbols::get().clEnqueueNDRangeKernel_ptr; + + auto _timer_enqueue_function = [this](cl_command_queue command_queue, cl_kernel kernel, + cl_uint work_dim, const size_t *gwo, const size_t *gws, + const size_t *lws, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *usr_event) { + cl_event event; + cl_int enqueue_res = + this->_origin_enqueue_function(command_queue, kernel, work_dim, gwo, gws, lws, + num_events_in_wait_list, event_wait_list, &event); + this->_measured_events.emplace_back(event); + + // According to spec, if NULL was provided in usr_event - event shouldn't be returned + if (usr_event != nullptr) + { + clRetainEvent(event); + *usr_event = event; + } + return enqueue_res; + }; + arm_compute::CLSymbols::get().clEnqueueNDRangeKernel_ptr = _timer_enqueue_function; + + // Set CL_QUEUE_PROFILING_ENABLE flag for the CL command-queue, if it isn't already set + auto &cl_scheduler = arm_compute::CLScheduler::get(); + auto props = cl_scheduler.queue().getInfo<CL_QUEUE_PROPERTIES>(); + if ((props & CL_QUEUE_PROFILING_ENABLE) == 0) + { + cl_scheduler.set_queue( + cl::CommandQueue(cl_scheduler.context(), props | CL_QUEUE_PROFILING_ENABLE)); + } + }; + + /** + * @brief Get timer result by addition executed CL kernels durations + */ + void handleEnd() override + { + _timer_res = 0; + for (auto const &event : _measured_events) + { + cl_ulong start; + cl_ulong end; + event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start); + event.getProfilingInfo(CL_PROFILING_COMMAND_END, &end); + _timer_res += (end - start) / 1000.f; // nanoseconds -> microseconds + } + + // Restore origin CL enqueue function + arm_compute::CLSymbols::get().clEnqueueNDRangeKernel_ptr = _origin_enqueue_function; + }; + +private: + std::function<decltype(clEnqueueNDRangeKernel)> _origin_enqueue_function; + std::list<::cl::Event> _measured_events; +}; + +} // namespace acl_cl +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_CL_CLTIMER_H__ diff --git a/runtime/neurun/backend/acl_cl/CMakeLists.txt b/runtime/neurun/backend/acl_cl/CMakeLists.txt new file mode 100644 index 000000000..aaf6a4d62 --- /dev/null +++ b/runtime/neurun/backend/acl_cl/CMakeLists.txt @@ -0,0 +1,21 @@ +# Unsupported architecture +nnas_find_package(ARMCompute QUIET) +if(NOT ARMCompute_FOUND) + return() +endif(NOT ARMCompute_FOUND) + +set(LIB_NEURUN_BACKEND_ACL_CL neurun_backend_acl_cl) + +file(GLOB_RECURSE SOURCES "*.cc") + +add_library(${LIB_NEURUN_BACKEND_ACL_CL} SHARED ${SOURCES}) + +target_include_directories(${LIB_NEURUN_BACKEND_ACL_CL} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(${LIB_NEURUN_BACKEND_ACL_CL} PRIVATE neurun_core) +target_link_libraries(${LIB_NEURUN_BACKEND_ACL_CL} PRIVATE ${LIB_NEURUN_BACKEND_ACL_COMMON}) +target_link_libraries(${LIB_NEURUN_BACKEND_ACL_CL} PRIVATE nnfw_common) +target_link_libraries(${LIB_NEURUN_BACKEND_ACL_CL} PRIVATE nnfw_coverage) + +set_target_properties(${LIB_NEURUN_BACKEND_ACL_CL} PROPERTIES OUTPUT_NAME backend_acl_cl) + +install(TARGETS ${LIB_NEURUN_BACKEND_ACL_CL} DESTINATION lib) diff --git a/runtime/neurun/backend/acl_cl/Config.cc b/runtime/neurun/backend/acl_cl/Config.cc new file mode 100644 index 000000000..36bf83686 --- /dev/null +++ b/runtime/neurun/backend/acl_cl/Config.cc @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// For CLKernelLibraryEx initialization +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/CLKernelLibraryEx.h" + +#include <arm_compute/runtime/CL/CLScheduler.h> + +#include "Config.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ + +bool Config::initialize() +{ + if (!arm_compute::opencl_is_available()) + { + return false; + } + arm_compute::CLScheduler::get().default_init(); + // NOTE CLKernelLibraryEx must use the same context as CLScheduler + // It did not check whether another device is available. + arm_compute::CLKernelLibraryEx::get().init( + "./cl_kernels/", arm_compute::CLScheduler::get().context(), cl::Device::getDefault()); + + return true; +} + +} // namespace acl_cl +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/acl_cl/Config.h b/runtime/neurun/backend/acl_cl/Config.h new file mode 100644 index 000000000..a7ceaac26 --- /dev/null +++ b/runtime/neurun/backend/acl_cl/Config.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_CL_CONFIG_H__ +#define __NEURUN_BACKEND_ACL_CL_CONFIG_H__ + +#include "CLTimer.h" +#include <cpp14/memory.h> +#include <backend/IConfig.h> + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ + +class Config : public IConfig +{ +public: + std::string id() override { return "acl_cl"; } + bool initialize() override; + bool SupportPermutation() override { return true; } + bool SupportSubTensorAlloc() override { return true; } + std::unique_ptr<util::ITimer> timer() override { return nnfw::cpp14::make_unique<CLTimer>(); } +}; + +} // namespace acl_cl +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_CL_CONFIG_H__ diff --git a/runtime/neurun/backend/acl_cl/ConstantInitializer.cc b/runtime/neurun/backend/acl_cl/ConstantInitializer.cc new file mode 100644 index 000000000..165b17cd1 --- /dev/null +++ b/runtime/neurun/backend/acl_cl/ConstantInitializer.cc @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConstantInitializer.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ + +ConstantInitializer::ConstantInitializer(const ir::Operands &operands, + const std::shared_ptr<TensorBuilder> &tensor_builder) + : _operands{operands}, _tensor_builder{tensor_builder} +{ + // DO NOTHING +} + +void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node) +{ + const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE); + const auto &block_size_obj = _operands.at(block_size_index); + + if (block_size_obj.isConstant()) + { + _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::operand::ITensor &obj) { + const auto &shape = model_obj.shape(); + const auto base = reinterpret_cast<const int32_t *>(model_obj.data().base()); + assert(model_obj.shape().rank() == 1); + obj.access([&](::neurun::backend::operand::ITensor &tensor) { + for (size_t i = 0; i < shape.num_elements(); ++i) + { + const int32_t value = base[shape.num_elements() - i - 1]; + int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() + + tensor.calcOffset({static_cast<int32_t>(i)})); + *into = value; + } + }); + }; + } +} + +void ConstantInitializer::visit(const ir::operation::Conv2D &node) +{ + const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL); + const auto &kernel_obj = _operands.at(kernel_index); + registerPermuteInitializer(kernel_index, kernel_obj); + + const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS); + const auto &bias_obj = _operands.at(bias_index); + registerCopyInitializer(bias_index, bias_obj); +} + +void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node) +{ + const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL); + const auto &kernel_obj = _operands.at(kernel_index); + registerPermuteInitializer(kernel_index, kernel_obj); + + const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS); + const auto &bias_obj = _operands.at(bias_index); + registerCopyInitializer(bias_index, bias_obj); +} + +void ConstantInitializer::visit(const ir::operation::EmbeddingLookup &node) +{ + const auto &lookups_index = node.getInputs().at(ir::operation::EmbeddingLookup::LOOKUPS); + const auto &lookups_obj = _operands.at(lookups_index); + registerCopyInitializer(lookups_index, lookups_obj); +} + +void ConstantInitializer::visit(const ir::operation::FullyConnected &node) +{ + const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT); + const auto &weight_obj = _operands.at(weight_index); + registerCopyInitializer(weight_index, weight_obj); + + const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS); + const auto &bias_obj = _operands.at(bias_index); + registerCopyInitializer(bias_index, bias_obj); +} + +void ConstantInitializer::visit(const ir::operation::Gather &node) +{ + const auto &indices_index = node.getInputs().at(ir::operation::Gather::INDICES); + const auto &indices_obj = _operands.at(indices_index); + registerCopyInitializer(indices_index, indices_obj); +} + +void ConstantInitializer::visit(const ir::operation::HashtableLookup &node) +{ + const auto &lookups_index = node.getInputs().at(ir::operation::HashtableLookup::LOOKUPS); + const auto &lookups_obj = _operands.at(lookups_index); + registerCopyInitializer(lookups_index, lookups_obj); + + const auto &keys_index = node.getInputs().at(ir::operation::HashtableLookup::KEYS); + const auto &keys_obj = _operands.at(keys_index); + registerCopyInitializer(keys_index, keys_obj); +} + +void ConstantInitializer::visit(const ir::operation::LSTM &node) +{ + const auto &input_to_input_weights_index = + node.getInputs().at(ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS); + const auto &input_to_input_weights_obj = _operands.at(input_to_input_weights_index); + registerCopyInitializer(input_to_input_weights_index, input_to_input_weights_obj); + + const auto &input_to_forget_weights_index = + node.getInputs().at(ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS); + const auto &input_to_forget_weights_obj = _operands.at(input_to_forget_weights_index); + registerCopyInitializer(input_to_forget_weights_index, input_to_forget_weights_obj); + + const auto &input_to_cell_weights_index = + node.getInputs().at(ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS); + const auto &input_to_cell_weights_obj = _operands.at(input_to_cell_weights_index); + registerCopyInitializer(input_to_cell_weights_index, input_to_cell_weights_obj); + + const auto &input_to_output_weights_index = + node.getInputs().at(ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS); + const auto &input_to_output_weights_obj = _operands.at(input_to_output_weights_index); + registerCopyInitializer(input_to_output_weights_index, input_to_output_weights_obj); + + const auto &recurrent_to_input_weights_index = + node.getInputs().at(ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS); + const auto &recurrent_to_input_weights_obj = _operands.at(recurrent_to_input_weights_index); + registerCopyInitializer(recurrent_to_input_weights_index, recurrent_to_input_weights_obj); + + const auto &recurrent_to_forget_weights_index = + node.getInputs().at(ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS); + const auto &recurrent_to_forget_weights_obj = _operands.at(recurrent_to_forget_weights_index); + registerCopyInitializer(recurrent_to_forget_weights_index, recurrent_to_forget_weights_obj); + + const auto &recurrent_to_cell_weights_index = + node.getInputs().at(ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS); + const auto &recurrent_to_cell_weights_obj = _operands.at(recurrent_to_cell_weights_index); + registerCopyInitializer(recurrent_to_cell_weights_index, recurrent_to_cell_weights_obj); + + const auto &recurrent_to_output_weights_index = + node.getInputs().at(ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS); + const auto &recurrent_to_output_weights_obj = _operands.at(recurrent_to_output_weights_index); + registerCopyInitializer(recurrent_to_output_weights_index, recurrent_to_output_weights_obj); + + const auto &cell_to_input_weights_index = + node.getInputs().at(ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS); + const auto &cell_to_input_weights_obj = _operands.at(cell_to_input_weights_index); + registerCopyInitializer(cell_to_input_weights_index, cell_to_input_weights_obj); + + const auto &cell_to_forget_weights_index = + node.getInputs().at(ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS); + const auto &cell_to_forget_weights_obj = _operands.at(cell_to_forget_weights_index); + registerCopyInitializer(cell_to_forget_weights_index, cell_to_forget_weights_obj); + + const auto &cell_to_output_weights_index = + node.getInputs().at(ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS); + const auto &cell_to_output_weights_obj = _operands.at(cell_to_output_weights_index); + registerCopyInitializer(cell_to_output_weights_index, cell_to_output_weights_obj); + + const auto &input_gate_bias_index = node.getInputs().at(ir::operation::LSTM::INPUT_GATE_BIAS); + const auto &input_gate_bias_obj = _operands.at(input_gate_bias_index); + registerCopyInitializer(input_gate_bias_index, input_gate_bias_obj); + + const auto &forget_gate_bias_index = node.getInputs().at(ir::operation::LSTM::FORGET_GATE_BIAS); + const auto &forget_gate_bias_obj = _operands.at(forget_gate_bias_index); + registerCopyInitializer(forget_gate_bias_index, forget_gate_bias_obj); + + const auto &output_gate_bias_index = node.getInputs().at(ir::operation::LSTM::OUTPUT_GATE_BIAS); + const auto &output_gate_bias_obj = _operands.at(output_gate_bias_index); + registerCopyInitializer(output_gate_bias_index, output_gate_bias_obj); + + const auto &projection_weights_index = + node.getInputs().at(ir::operation::LSTM::PROJECTION_WEIGHTS); + const auto &projection_weights_obj = _operands.at(projection_weights_index); + registerCopyInitializer(projection_weights_index, projection_weights_obj); + + const auto &projection_bias_index = node.getInputs().at(ir::operation::LSTM::PROJECTION_BIAS); + const auto &projection_bias_obj = _operands.at(projection_bias_index); + registerCopyInitializer(projection_bias_index, projection_bias_obj); +} + +void ConstantInitializer::visit(const ir::operation::RNN &node) +{ + const auto &weights_index = node.getInputs().at(ir::operation::RNN::WEIGHTS); + const auto &weights_obj = _operands.at(weights_index); + registerCopyInitializer(weights_index, weights_obj); + + const auto &recurrent_weights_index = node.getInputs().at(ir::operation::RNN::RECURRENT_WEIGHTS); + const auto &recurrent_weights_obj = _operands.at(recurrent_weights_index); + registerCopyInitializer(recurrent_weights_index, recurrent_weights_obj); + + const auto &bias_index = node.getInputs().at(ir::operation::RNN::BIAS); + const auto &bias_obj = _operands.at(bias_index); + registerCopyInitializer(bias_index, bias_obj); +} + +void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node) +{ + const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE); + const auto &block_size_obj = _operands.at(block_size_index); + + if (block_size_obj.isConstant()) + { + _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::operand::ITensor &obj) { + const auto &shape = model_obj.shape(); + const auto base = reinterpret_cast<const int32_t *>(model_obj.data().base()); + assert(model_obj.shape().rank() == 1); + obj.access([&](::neurun::backend::operand::ITensor &tensor) { + for (size_t i = 0; i < shape.num_elements(); ++i) + { + const int32_t value = base[shape.num_elements() - i - 1]; + int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() + + tensor.calcOffset({static_cast<int32_t>(i)})); + *into = value; + } + }); + }; + } + + const auto &paddings_index = node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS); + const auto &paddings_obj = _operands.at(paddings_index); + if (paddings_obj.isConstant()) + { + _init_map[paddings_index] = [](const ir::Operand &model_obj, backend::operand::ITensor &obj) { + const auto &shape = model_obj.shape(); + const auto base = reinterpret_cast<const int32_t *>(model_obj.data().base()); + assert(model_obj.shape().rank() == 2); + assert(obj.dimension(0) == 2); + obj.access([&](::neurun::backend::operand::ITensor &tensor) { + for (auto i = 0; i < shape.dim(0); ++i) + { + for (auto j = 0; j < shape.dim(1); ++j) + { + const int32_t value = base[i * 2 + j]; + int32_t *into = reinterpret_cast<int32_t *>( + tensor.buffer() + tensor.calcOffset({shape.dim(0) - i - 1, j})); + *into = value; + } + } + }); + }; + } +} + +void ConstantInitializer::visit(const ir::operation::TransposeConv &node) +{ + const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL); + const auto &kernel_obj = _operands.at(kernel_index); + registerPermuteInitializer(kernel_index, kernel_obj); +} + +} // namespace acl_cl +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/acl_cl/ConstantInitializer.h b/runtime/neurun/backend/acl_cl/ConstantInitializer.h new file mode 100644 index 000000000..5965d2046 --- /dev/null +++ b/runtime/neurun/backend/acl_cl/ConstantInitializer.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__ +#define __NEURUN_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__ + +#include <backend/IConstantInitializer.h> +#include <ir/Operands.h> +#include "TensorBuilder.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ + +class ConstantInitializer : public IConstantInitializer +{ +public: + ConstantInitializer(const ir::Operands &operands, + const std::shared_ptr<TensorBuilder> &tensor_builder); + +public: + void visit(const ir::operation::BatchToSpaceND &) override; + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::EmbeddingLookup &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::Gather &) override; + void visit(const ir::operation::HashtableLookup &) override; + void visit(const ir::operation::LSTM &) override; + void visit(const ir::operation::RNN &) override; + void visit(const ir::operation::SpaceToBatchND &) override; + void visit(const ir::operation::TransposeConv &) override; + +private: + const ir::Operands &operands() const override { return _operands; } + std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; } + +private: + const ir::Operands &_operands; + std::shared_ptr<TensorBuilder> _tensor_builder; +}; + +} // namespace acl_cl +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__ diff --git a/runtime/neurun/backend/acl_cl/KernelGenerator.cc b/runtime/neurun/backend/acl_cl/KernelGenerator.cc new file mode 100644 index 000000000..bffb60b61 --- /dev/null +++ b/runtime/neurun/backend/acl_cl/KernelGenerator.cc @@ -0,0 +1,2151 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "KernelGenerator.h" + +#include <arm_compute/runtime/CL/CLFunctions.h> // Include all ARM Compute CL functions +#include <arm_compute/runtime/CL/CLFunctionsEx.h> // Include all ARM Compute EX CL functions + +#include <AclFunction.h> +#include <Convert.h> +#include <Swizzle.h> + +#include "ir/Index.h" +#include "ir/DataType.h" +#include "ir/InternalType.h" +#include "compiler/IExecutionBuilder.h" +#include "exec/NopFunction.h" +#include "util/logging.h" +#include "util/Utils.h" +#include "util/Padding.h" + +using ::neurun::compiler::IExecutionBuilder; + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ + +using ::neurun::backend::acl_common::asAclFunction; + +// +// ActivationBuilder +// +class ActivationBuilder +{ +public: + explicit ActivationBuilder(IExecutionBuilder &builder) : _builder(builder) + { + // DO NOTHING + } + +private: + void appendReLU(::arm_compute::ICLTensor *ifm_alloc); + void appendReLU1(::arm_compute::ICLTensor *ifm_alloc); + void appendReLU6(::arm_compute::ICLTensor *ifm_alloc); + +public: + void append(ir::Activation code, ::arm_compute::ICLTensor *ifm_alloc); + +private: + IExecutionBuilder &_builder; +}; + +void ActivationBuilder::appendReLU(::arm_compute::ICLTensor *ifm_alloc) +{ + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); + + fn->configure(ifm_alloc, nullptr, act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _builder.append(std::move(acl_fn)); +} + +void ActivationBuilder::appendReLU1(::arm_compute::ICLTensor *ifm_alloc) +{ + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); + + fn->configure(ifm_alloc, nullptr, act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _builder.append(std::move(acl_fn)); +} + +void ActivationBuilder::appendReLU6(::arm_compute::ICLTensor *ifm_alloc) +{ + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); + + fn->configure(ifm_alloc, nullptr, act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _builder.append(std::move(acl_fn)); +} + +void ActivationBuilder::append(ir::Activation code, ::arm_compute::ICLTensor *ifm_alloc) +{ + switch (code) + { + case ir::Activation::NONE: + { + // DO NOTHING + break; + } + case ir::Activation::RELU: + { + appendReLU(ifm_alloc); + break; + } + case ir::Activation::RELU1: + { + appendReLU1(ifm_alloc); + break; + } + case ir::Activation::RELU6: + { + appendReLU6(ifm_alloc); + break; + } + default: + { + throw std::runtime_error("Not supported, yet"); + } + } +} + +// +// KernelGenerator +// +KernelGenerator::KernelGenerator(const ir::Operands &ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder) + : _ctx(ctx), _tensor_builder(tensor_builder), _current_subg_layout(ir::Layout::UNKNOWN) +{ + // DO NOTHING +} + +void KernelGenerator::visit(const ir::OpSequence &op_seq) +{ + _current_subg_layout = op_seq.getLayout(); + for (const auto &e : op_seq.operations()) + { + const auto &node = *(e.node); + _tensor_builder->preVisit(node); + node.accept(*this); + _tensor_builder->postVisit(node); + } +} + +void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)}; + const auto block_size_index{ + node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto block_size_alloc = _tensor_builder->at(block_size_index).get(); + + assert(_ctx.at(block_size_index).isConstant()); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBatchToSpaceLayer>(); + + fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Cast &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + const auto input_sub_type = _ctx.at(ifm_index).typeInfo().type() == ir::DataType::BOOL8 + ? arm_compute::SubDataType::BOOL + : arm_compute::SubDataType::NONE; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLCast>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), input_sub_type); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Conv2D &node) +{ + using ir::operation::Conv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; + + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + + const auto stride = node.param().stride; + const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, + stride, ker_width, ker_height); + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ker_alloc = _tensor_builder->at(ker_index).get(); + auto bias_alloc = _tensor_builder->at(bias_index).get(); + + const auto conv_info = acl_common::asPadStrideInfo(padding, stride); + const auto act_info = acl_common::asActivationLayerInfo(activation); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLConvolutionLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); + + fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(), + conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info); + + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) +{ + using ir::operation::DepthwiseConv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)}; + + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + // Kernel format is [1, kernel_height, kernel_width, depth_out]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + + const auto stride = node.param().stride; + const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, + stride, ker_width, ker_height); + const auto multiplier = node.param().multiplier; + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ker_alloc = _tensor_builder->at(ker_index).get(); + auto bias_alloc = _tensor_builder->at(bias_index).get(); + + const auto conv_info = acl_common::asPadStrideInfo(padding, stride); + const auto act_info = acl_common::asActivationLayerInfo(activation); + + if (ker_height == 3 && ker_width == 3) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer3x3>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); + + fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), + ofm_alloc->handle(), conv_info, multiplier, act_info); + + _execution_builder->append(asAclFunction(std::move(fn))); + } + else + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>(); + + fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), + ofm_alloc->handle(), conv_info, multiplier, act_info); + + _execution_builder->append(asAclFunction(std::move(fn))); + } +} + +void KernelGenerator::visit(const ir::operation::MaxPool2D &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)}; + + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + + const auto kh = node.param().kh; + const auto kw = node.param().kw; + const auto stride = node.param().stride; + const auto padding = + neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto activation = node.param().activation; + + VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl; + VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl; + VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl; + VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl; + VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl; + VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl; + VERBOSE(MaxPool2D) << "STRIDE_H: " << stride.vertical << std::endl; + VERBOSE(MaxPool2D) << "STRIDE_W: " << stride.horizontal << std::endl; + VERBOSE(MaxPool2D) << "PAD(T): " << padding.top << std::endl; + VERBOSE(MaxPool2D) << "PAD(B): " << padding.bottom << std::endl; + VERBOSE(MaxPool2D) << "PAD(L): " << padding.left << std::endl; + VERBOSE(MaxPool2D) << "PAD(R): " << padding.right << std::endl; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX, + ::arm_compute::Size2D{kw, kh}, + acl_common::asPadStrideInfo(padding, stride)}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPoolingLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append((std::move(acl_fn))); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::AvgPool2D &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)}; + + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + + const auto kh = node.param().kh; + const auto kw = node.param().kw; + const auto stride = node.param().stride; + const auto padding = + neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto activation = node.param().activation; + + VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl; + VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl; + VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl; + VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl; + VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl; + VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl; + VERBOSE(AvgPool2D) << "STRIDE_H: " << stride.vertical << std::endl; + VERBOSE(AvgPool2D) << "STRIDE_W: " << stride.horizontal << std::endl; + VERBOSE(AvgPool2D) << "PAD(T): " << padding.top << std::endl; + VERBOSE(AvgPool2D) << "PAD(B): " << padding.bottom << std::endl; + VERBOSE(AvgPool2D) << "PAD(L): " << padding.left << std::endl; + VERBOSE(AvgPool2D) << "PAD(R): " << padding.right << std::endl; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + ::arm_compute::PoolingLayerInfo info{ + ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{kw, kh}, + acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPoolingLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append((std::move(acl_fn))); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::Concat &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + + std::vector<ir::OperandIndex> input_indexes; + + for (const auto &input : node.getInputs()) + input_indexes.emplace_back(input); + + const auto axis = node.param().axis; + + // If tensor allocator allocate as subtensor + bool canEliminate = true; + for (auto &ifm_ind : input_indexes) + { + if (!_tensor_builder->isSubTensorOf(ofm_index, ifm_ind)) + { + canEliminate = false; + break; + } + } + if (canEliminate) + { + // If concat eliminated, return a NOP IFunction + _execution_builder->append(nnfw::cpp14::make_unique<exec::NopFunction>()); + return; + } + + auto output_alloc = _tensor_builder->at(ofm_index).get(); + std::vector<::arm_compute::ICLTensor *> input_tensors; + for (auto &ifm_ind : input_indexes) + input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle()); + + std::unique_ptr<::arm_compute::IFunction> fn; + if (input_indexes.size() < 2) + { + auto l = nnfw::cpp14::make_unique<::arm_compute::CLCopy>(); + l->configure(input_tensors.at(0), output_alloc->handle()); + fn = std::move(l); + } + else + { + auto l = nnfw::cpp14::make_unique<::arm_compute::CLConcatenateLayer>(); + const auto rank = node.param().rank; + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = output_alloc->layout(); + const auto fixed_axis = + acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(); + l->configure(input_tensors, output_alloc->handle(), fixed_axis); + fn = std::move(l); + } + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::FullyConnected &node) +{ + using ir::operation::FullyConnected; + + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)}; + const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)}; + const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)}; + + const auto input_rank = _ctx.at(input_index).shape().rank(); + // TODO Currently we are not handling where the case is that the input's rank is 3. + // The handling should be added in the future. + assert(input_rank != 3); + + const auto output_size = + _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1); + UNUSED_RELEASE(output_size); + assert(_ctx.at(bias_index).shape().dim(0) == output_size); + assert(_ctx.at(weight_index).shape().dim(0) == output_size); + const auto batch_size = + _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 2); + const auto input_size = + _ctx.at(weight_index).shape().dim(_ctx.at(weight_index).shape().rank() - 1); + + // Check for reshaping input's shape into rank-2 + bool needs_reshape = false; + ir::Shape reshape(2); + if (input_rank == 4) + { + const auto feature_size = _ctx.at(input_index).shape().num_elements(); + + UNUSED_RELEASE(feature_size); + assert(batch_size >= 0 && input_size >= 0); + assert(feature_size == static_cast<uint64_t>(batch_size) * static_cast<uint64_t>(input_size)); + + // for reshaping + needs_reshape = true; + reshape.dim(0) = batch_size; /* H */ + reshape.dim(1) = input_size; /* W */ + } + + const auto activation = node.param().activation; + + auto output_alloc = _tensor_builder->at(output_index).get(); + const auto input_alloc = _tensor_builder->at(input_index).get(); + const auto weight_alloc = _tensor_builder->at(weight_index).get(); + const auto bias_alloc = _tensor_builder->at(bias_index).get(); + const auto frontend_layout = _current_subg_layout; + const auto acl_layout = output_alloc->handle()->info()->data_layout(); + + auto fn = nnfw::cpp14::make_unique<arm_compute::CLFullyConnectedReshapingLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); + + fn->configure( + input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(), + needs_reshape, + ::neurun::backend::acl_common::asTensorShape( + reshape, frontend_layout, ::neurun::backend::acl_common::asRuntimeLayout(acl_layout))); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, output_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::Mul &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPixelWiseMultiplication>(); + + fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale + arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::ReduceSum &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::ReduceSum::Input::INPUT)}; + const auto &axes{node.param().axes}; + const auto keep_dims{node.param().keep_dims}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = input_alloc->layout(); + + // Convert to ACL axes taking into account negative values and possible duplicates. + std::set<std::uint32_t> acl_axes; + const int input_rank = node.param().rank; + for (int axis : axes) + { + if (axis < 0) + axis += input_rank; + acl_axes.insert( + acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value()); + } + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); + + fn->configure(input_alloc->handle(), output_alloc->handle(), acl_axes, keep_dims, + ::arm_compute::ReduceOperation::SUM); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Reshape &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + // NOTE This operation must not be changed the layout from frontend to backend + // So, PermutationOperationPass makes layouts of frontend and backend the same. + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = output_alloc->layout(); + assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) || + frontend_layout == backend_layout); + UNUSED_RELEASE(frontend_layout); + UNUSED_RELEASE(backend_layout); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReshapeLayer>(); + + fn->configure(input_alloc->handle(), output_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Squeeze &node) +{ + // Squeeze is identical to reshape except that it has an optional dimensions input. + // In addition, optional dims_index is ignored since output tensor already has squeezed shape + // by freezer and toco + // TODO Support multi-layout for frontend and backend + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)}; + const auto dims{node.param().dims}; + const auto ndim{node.param().ndim}; + (void)dims; + (void)ndim; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + auto fn = nnfw::cpp14::make_unique<arm_compute::CLReshapeLayer>(); + fn->configure(input_alloc->handle(), output_alloc->handle()); + auto acl_fn = asAclFunction(std::move(fn)); + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Tanh &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<arm_compute::CLActivationLayer>(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f}; + + fn->configure(input_alloc->handle(), output_alloc->handle(), act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Softmax &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)}; + + const auto beta = node.param().beta; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSoftmaxLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); + + fn->configure(input_alloc->handle(), output_alloc->handle(), beta); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Slice &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)}; + const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)}; + const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)}; + + auto outputData_alloc = _tensor_builder->at(output_index).get(); + auto inputData_alloc = _tensor_builder->at(input_index).get(); + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = inputData_alloc->layout(); + + // Set initializers for indices data such as order of inputData + int input_rank = node.param().rank; + std::vector<int32_t> starts; + std::vector<int32_t> ends; + starts.resize(input_rank, 0); + ends.resize(input_rank, 0); + { + auto beginData_base = _ctx.at(begins_index).data().base(); + auto sizeData_base = _ctx.at(sizes_index).data().base(); + const int beginData_size = _ctx.at(begins_index).shape().num_elements(); + const int sizeData_size = _ctx.at(sizes_index).shape().num_elements(); + + using ir::DataType; + + UNUSED_RELEASE(beginData_size); + UNUSED_RELEASE(sizeData_size); + + assert(_ctx.at(begins_index).typeInfo().type() == DataType::INT32); + assert(_ctx.at(sizes_index).typeInfo().type() == DataType::INT32); + assert(beginData_size == input_rank); + assert(sizeData_size == input_rank); + + assert(beginData_base != nullptr); + for (int n = 0; n < input_rank; ++n) + { + auto axis = ::neurun::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout, + backend_layout) + .value(); + + int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n); + starts[axis] = begin_value; + + int32_t size_value = *(reinterpret_cast<const int32_t *>(sizeData_base) + n); + ends[axis] = begin_value + size_value; + } + } + + ::arm_compute::Coordinates starts_set; + ::arm_compute::Coordinates ends_set; + + for (size_t i = 0; i < starts.size(); ++i) + { + starts_set.set(i, starts[i]); + ends_set.set(i, ends[i]); + } + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSlice>(); + + fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::StridedSlice &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)}; + const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)}; + const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)}; + const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)}; + + auto outputData_alloc = _tensor_builder->at(output_index).get(); + auto inputData_alloc = _tensor_builder->at(input_index).get(); + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = inputData_alloc->layout(); + + // Set initializers for indices data such as order of inputData + int input_rank = node.param().rank; + std::vector<int32_t> starts; + std::vector<int32_t> ends; + std::vector<int32_t> strides; + starts.resize(input_rank, 0); + ends.resize(input_rank, 0); + strides.resize(input_rank, 0); + { + auto startData_base = _ctx.at(starts_index).data().base(); + auto endData_base = _ctx.at(ends_index).data().base(); + auto stridesData_base = _ctx.at(strides_index).data().base(); + const int startData_size = _ctx.at(starts_index).shape().num_elements(); + const int endData_size = _ctx.at(ends_index).shape().num_elements(); + const int stridesData_size = _ctx.at(strides_index).shape().num_elements(); + + using ir::DataType; + + UNUSED_RELEASE(startData_size); + UNUSED_RELEASE(endData_size); + UNUSED_RELEASE(stridesData_size); + + assert(_ctx.at(starts_index).typeInfo().type() == DataType::INT32); + assert(_ctx.at(ends_index).typeInfo().type() == DataType::INT32); + assert(_ctx.at(strides_index).typeInfo().type() == DataType::INT32); + assert(startData_size == input_rank); + assert(endData_size == input_rank); + assert(stridesData_size == input_rank); + + assert(startData_base != nullptr); + for (int n = 0; n < input_rank; ++n) + { + auto axis = ::neurun::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout, + backend_layout) + .value(); + + int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n); + starts[axis] = start_value; + + int32_t end_value = *(reinterpret_cast<const int32_t *>(endData_base) + n); + ends[axis] = end_value; + + int32_t strides_value = *(reinterpret_cast<const int32_t *>(stridesData_base) + n); + strides[axis] = strides_value; + } + } + + // Set mask bits such as order of inputData + const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank, + frontend_layout, backend_layout); + const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank, + frontend_layout, backend_layout); + const auto shrink_axis_mask = acl_common::ReorderBits<int32_t>( + node.param().shrink_axis_mask, input_rank, frontend_layout, backend_layout); + + ::arm_compute::Coordinates starts_set; + ::arm_compute::Coordinates ends_set; + ::arm_compute::BiStrides strides_set; + + for (size_t i = 0; i < starts.size(); ++i) + { + starts_set.set(i, starts[i]); + ends_set.set(i, ends[i]); + strides_set.set(i, strides[i]); + } + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLStridedSlice>(); + + fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set, + strides_set, begin_mask, end_mask, shrink_axis_mask); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Transpose &node) +{ + const auto ofm_idx{node.getOutputs().at(0)}; + const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)}; + const auto &perm{node.param().perm}; + + const auto rank = node.param().rank; + + auto ofm_alloc = _tensor_builder->at(ofm_idx).get(); + auto ifm_alloc = _tensor_builder->at(ifm_idx).get(); + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = ifm_alloc->layout(); + + std::vector<std::int32_t> pv(perm.cbegin(), perm.cend()); + // Reversed + auto backend_pv = ::neurun::backend::acl_common::getARMComputePermutationVector( + rank, pv, frontend_layout, backend_layout); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPermute>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), backend_pv); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Add &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticAddition>(); + + fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), + arm_compute::ConvertPolicy::SATURATE); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::Sub &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticSubtraction>(); + + fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), + arm_compute::ConvertPolicy::SATURATE); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::Div &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticDivision>(); + + fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::Exp &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLExpLayer>(); + + fn->configure(input_alloc->handle(), output_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::InstanceNorm &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)}; + const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)}; + const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto gamma_alloc = _tensor_builder->at(gamma_index).get(); + auto beta_alloc = _tensor_builder->at(beta_index).get(); + auto epsilon = node.param().epsilon; + auto activation = node.param().activation; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLInstanceNormalizationLayerEx>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), gamma_alloc->handle(), + beta_alloc->handle(), epsilon); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::Logistic &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::LogicalAnd &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)}; + const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input0_alloc = _tensor_builder->at(input0_index).get(); + auto input1_alloc = _tensor_builder->at(input1_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBinaryLogicalOp>(); + + fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(), + ::arm_compute::BinaryLogicalOperation::AND); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::LSTM &node) +{ + // TODO Support dynamic rnn + // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection. + const auto scratch_buffer_index{ + node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; + const auto output_state_out_index{ + node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; + const auto cell_state_out_index{ + node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)}; + const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)}; + + const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)}; + const auto input_to_input_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional + const auto input_to_forget_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)}; + const auto input_to_cell_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)}; + const auto input_to_output_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)}; + const auto recurrent_to_input_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional + const auto recurrent_to_forget_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)}; + const auto recurrent_to_cell_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)}; + const auto recurrent_to_output_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)}; + const auto cell_to_input_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional + const auto cell_to_forget_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional + const auto cell_to_output_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional + const auto input_gate_bias_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)}; + const auto forget_gate_bias_index{ + node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)}; + const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)}; + const auto output_gate_bias_index{ + node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)}; + const auto projection_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional + const auto projection_bias_index{ + node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional + const auto output_state_in_index{ + node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)}; + const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)}; + const auto cell_threshold = node.param().cell_threshold; + const auto projection_threshold = node.param().projection_threshold; + + bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 && + _ctx.at(input_to_input_weights_index).shape().dim(1) != 0; + bool has_recurrent_to_input_weights = + _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && + _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0; + bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0; + bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0; + bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 && + _ctx.at(projection_weights_index).shape().dim(1) != 0; + bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0); + + // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG. + // true: no CIFG + // false: CIFG + // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG). + bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights; + + // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole. + // But the cell_to_input_weights does not exist in regular CIFG although peephole. + // true: peephole + // false: no peephole + bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights; + + // NOTE Although the projection weights has data the projection bias may not have data. + bool has_projection_param = has_projection_weights; + + const auto activation = node.param().activation; + const auto cell_clip = cell_threshold; + const auto projection_clip = projection_threshold; + assert(cell_clip >= 0.f && projection_clip >= 0.f); + + auto scratch_buffer_alloc = _tensor_builder->at(scratch_buffer_index).get(); + auto output_state_out_alloc = _tensor_builder->at(output_state_out_index).get(); + auto cell_state_out_alloc = _tensor_builder->at(cell_state_out_index).get(); + auto output_alloc = _tensor_builder->at(output_index).get(); + + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto input_to_forget_weights_alloc = _tensor_builder->at(input_to_forget_weights_index).get(); + auto input_to_cell_weights_alloc = _tensor_builder->at(input_to_cell_weights_index).get(); + auto input_to_output_weights_alloc = _tensor_builder->at(input_to_output_weights_index).get(); + auto recurrent_to_forget_weights_alloc = + _tensor_builder->at(recurrent_to_forget_weights_index).get(); + auto recurrent_to_cell_weights_alloc = _tensor_builder->at(recurrent_to_cell_weights_index).get(); + auto recurrent_to_output_weights_alloc = + _tensor_builder->at(recurrent_to_output_weights_index).get(); + + auto forget_gate_bias_alloc = _tensor_builder->at(forget_gate_bias_index).get(); + auto cell_bias_alloc = _tensor_builder->at(cell_bias_index).get(); + auto output_gate_bias_alloc = _tensor_builder->at(output_gate_bias_index).get(); + auto output_state_in_alloc = _tensor_builder->at(output_state_in_index).get(); + auto cell_state_in_alloc = _tensor_builder->at(cell_state_in_index).get(); + + auto act_info = ::neurun::backend::acl_common::asActivationLayerInfo(activation); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLLSTMLayer>(); + + ::arm_compute::LSTMParams<::arm_compute::ICLTensor> lstm_params{}; + if (has_cifg_param) + { + auto input_to_input_weights_alloc = + _tensor_builder->at(input_to_input_weights_index).get(); // optional + auto recurrent_to_input_weights_alloc = + _tensor_builder->at(recurrent_to_input_weights_index).get(); // optional + auto cell_to_input_weights_handle = + has_peephole_param ? _tensor_builder->at(cell_to_input_weights_index).get()->handle() + : nullptr; // optional (non-cifg && peephole) + auto input_gate_bias_alloc = _tensor_builder->at(input_gate_bias_index).get(); // optional + lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(), + recurrent_to_input_weights_alloc->handle(), + cell_to_input_weights_handle, input_gate_bias_alloc->handle()); + } + if (has_peephole_param) + { + auto cell_to_forget_weights_alloc = + _tensor_builder->at(cell_to_forget_weights_index).get(); // optional + auto cell_to_output_weights_alloc = + _tensor_builder->at(cell_to_output_weights_index).get(); // optional + lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(), + cell_to_output_weights_alloc->handle()); + } + if (has_projection_param) + { + auto projection_weights_alloc = _tensor_builder->at(projection_weights_index).get(); // optional + auto projection_bias_handle = has_projection_bias + ? _tensor_builder->at(projection_bias_index).get()->handle() + : nullptr; // optional + lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle); + } + + fn->configure( + input_alloc->handle(), input_to_forget_weights_alloc->handle(), + input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(), + recurrent_to_forget_weights_alloc->handle(), recurrent_to_cell_weights_alloc->handle(), + recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(), + cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), output_state_in_alloc->handle(), + cell_state_in_alloc->handle(), scratch_buffer_alloc->handle(), + output_state_out_alloc->handle(), cell_state_out_alloc->handle(), output_alloc->handle(), + lstm_params, act_info, cell_clip, projection_clip); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::ReduceMax &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::ReduceMax::Input::INPUT)}; + const auto &axes{node.param().axes}; + const auto keep_dims{node.param().keep_dims}; + + auto ofm_alloc = _tensor_builder->at(output_index).get(); + auto ifm_alloc = _tensor_builder->at(input_index).get(); + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = ifm_alloc->layout(); + + // Convert to ACL axes taking into account negative values and possible duplicates. + std::set<std::uint32_t> acl_axes; + const int ifm_rank = node.param().rank; + for (int axis : axes) + { + if (axis < 0) + axis += ifm_rank; + acl_axes.insert( + acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value()); + } + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), acl_axes, keep_dims, + arm_compute::ReduceOperation::MAX); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Comparison &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)}; + const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)}; + + const auto comparison_type = node.param().comparison_type; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input0_alloc = _tensor_builder->at(input0_index).get(); + auto input1_alloc = _tensor_builder->at(input1_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLComparison>(); + + fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(), + (arm_compute::ComparisonOperation)comparison_type); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Pack &node) +{ + const auto output_index{node.getOutputs().at(0)}; + auto axis{node.param().axis}; + + const auto output_rank = node.param().rank; + + std::vector<ir::OperandIndex> input_indexes; + for (const auto &input_index : node.getInputs()) + input_indexes.emplace_back(input_index); + + auto output = _tensor_builder->at(output_index).get()->handle(); + std::vector<arm_compute::ICLTensor *> inputs; + for (const auto &input_index : input_indexes) + inputs.emplace_back(_tensor_builder->at(input_index)->handle()); + + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = _tensor_builder->at(output_index).get()->layout(); + + if (axis < 0) + axis += output_rank; + axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLStackLayer>(); + + fn->configure(inputs, axis, output); + + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const ir::operation::Permute &node) +{ + const auto ofm_idx{node.getOutputs().at(0)}; + const auto ifm_idx{node.getInputs().at(0)}; + const auto permute_type = node.getPermuteType(); + auto ofm_alloc = _tensor_builder->at(ofm_idx).get(); + auto ifm_alloc = _tensor_builder->at(ifm_idx).get(); + const auto rank = _ctx.at(ofm_idx).shape().rank(); + assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank()); + + std::unique_ptr<::arm_compute::IFunction> fn; + arm_compute::PermutationVector pv; + if (permute_type == ir::operation::Permute::Type::NCHW_TO_NHWC && rank == 4) + { + // WHCN -> CWHN + pv = arm_compute::PermutationVector{2, 0, 1}; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLPermute>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv); + + fn = std::move(l); + } + else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4) + { + // CWHN -> WHCN + pv = arm_compute::PermutationVector{1, 2, 0}; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLPermute>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv); + + fn = std::move(l); + } + else + { + auto l = nnfw::cpp14::make_unique<::arm_compute::CLCopy>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle()); + + fn = std::move(l); + } + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::RSQRT &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLRsqrtLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle()); + + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const ir::operation::ReLU &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<arm_compute::CLActivationLayer>(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; + + fn->configure(input_alloc->handle(), output_alloc->handle(), act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::ResizeBilinear &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + + const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLScale>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), + ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE, + ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::ReLU1 &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::ReLU6 &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::RNN &node) +{ + const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)}; + const auto hidden_state_out_index{ + node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)}; + + const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)}; + const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)}; + const auto recurrent_weights_index{ + node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)}; + const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)}; + const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)}; + + const auto activation = node.param().activation; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto hidden_state_out_alloc = _tensor_builder->at(hidden_state_out_index).get(); + + auto input_alloc = _tensor_builder->at(input_index).get(); + auto weights_alloc = _tensor_builder->at(weights_index).get(); + auto recurrent_weights_alloc = _tensor_builder->at(recurrent_weights_index).get(); + auto bias_alloc = _tensor_builder->at(bias_index).get(); + auto hidden_state_in_alloc = _tensor_builder->at(hidden_state_in_index).get(); + auto act_info = ::neurun::backend::acl_common::asActivationLayerInfo(activation); + + auto copy_layer = nnfw::cpp14::make_unique<::arm_compute::CLCopy>(); + copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle()); + _execution_builder->append(asAclFunction(std::move(copy_layer))); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLRNNLayerEx>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); + fn->configure(input_alloc->handle(), weights_alloc->handle(), recurrent_weights_alloc->handle(), + bias_alloc->handle(), hidden_state_out_alloc->handle(), output_alloc->handle(), + act_info); + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const ir::operation::Floor &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLFloor>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)}; + const auto block_size_index{ + node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; + const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto block_size_alloc = _tensor_builder->at(block_size_index).get(); + auto paddings_alloc = _tensor_builder->at(paddings_index).get(); + + assert(_ctx.at(block_size_index).isConstant()); + assert(_ctx.at(paddings_index).isConstant()); + + std::unique_ptr<::arm_compute::IFunction> fn; + if (_ctx.at(ofm_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM) + { + // NOTE CLSpaceToBatchLayer has a bug that padding's values are 0 even when zero point of + // QASYMM8 is not 0. + auto l = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToBatchND>(); + l->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(), + ofm_alloc->handle()); + fn = std::move(l); + } + else + { + auto l = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToBatchLayer>(); + l->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(), + ofm_alloc->handle()); + fn = std::move(l); + } + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::SpaceToDepth &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)}; + + auto block_size = node.param().block_size; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToDepth>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), block_size); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::L2Pool2D &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::L2Pool2D::Input::INPUT)}; + + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + + uint32_t kw = node.param().kw; + uint32_t kh = node.param().kh; + const auto stride = node.param().stride; + const auto padding = + neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + ::arm_compute::PoolingLayerInfo info{ + ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{kw, kh}, + ::neurun::backend::acl_common::asPadStrideInfo(padding, stride)}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPoolingLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)}; + const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto lookups_alloc = _tensor_builder->at(lookups_index).get(); + auto values_alloc = _tensor_builder->at(values_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLEmbeddingLookup>(); + + fn->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::L2Normalization &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)}; + + // {CL|Neon}L2Normalization performs the reduction only along dimension 0 + // L2 Normalization always performs the reduction along the depth axis + // Thus, we repurpose {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by + // choosing normalization parameters as below + + const auto &ifm_shape = _ctx.at(ifm_index).shape(); + // TODO Support optional constant dimension that normalization would be performed on + const auto normalization_axis = node.param().rank - 1; + int32_t radius = + 2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1 + float alpha = 1.0f; // In the implementation to make alpha_ become 1 + float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction) + float bias = 0.0f; // Don't offset the reduction. + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP, + radius, alpha, beta, bias, false); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::HashtableLookup &node) +{ + const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)}; + const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)}; + + const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)}; + const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)}; + const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto hits_alloc = _tensor_builder->at(hits_index).get(); + + auto lookups_alloc = _tensor_builder->at(lookups_index).get(); + auto keys_alloc = _tensor_builder->at(keys_index).get(); + auto values_alloc = _tensor_builder->at(values_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLHashtableLookup>(); + + fn->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(), + output_alloc->handle(), hits_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::PReLU &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)}; + const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto alpha_alloc = _tensor_builder->at(alpha_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPReLU>(); + + fn->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::TransposeConv &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto output_shape_index{ + node.getInputs().at(ir::operation::TransposeConv::Input::OUTPUT_SHAPE)}; + const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)}; + const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)}; + + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_subg_layout); + + const auto stride = node.param().stride; + + assert((node.param().padding.type == ir::PaddingType::SAME) || + (node.param().padding.type == ir::PaddingType::VALID)); + auto padding = neurun::util::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride, + ker_shape.W, ker_shape.H); + + uint32_t invalid_horizontal = 0; + uint32_t invalid_vertical = 0; + if (node.param().padding.type == ir::PaddingType::VALID) + { + invalid_horizontal = + ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1); + invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1); + } + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ker_alloc = _tensor_builder->at(ker_index).get(); + + const auto tconv_info = acl_common::asPadStrideInfo(padding, stride); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLTransposeConvLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); + + fn->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info, + invalid_horizontal, invalid_vertical); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::SQRT &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); + + fn->configure(input_alloc->handle(), output_alloc->handle(), act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::LogicalOr &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)}; + const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input0_alloc = _tensor_builder->at(input0_index).get(); + auto input1_alloc = _tensor_builder->at(input1_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBitwiseOr>(); + + fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::LogicalNot &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBitwiseNot>(); + + fn->configure(input_alloc->handle(), output_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::SquaredDifference &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLElementwiseSquaredDiff>(); + + fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::TopKV2 &node) +{ + const auto outputValues_index{node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_VALUES)}; + const auto outputIndices_index{ + node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_INDICES)}; + + const auto inputData_index{node.getInputs().at(ir::operation::TopKV2::Input::INPUT)}; + + // Currently, we only support the vector input. + assert(_ctx.at(inputData_index).shape().rank() == 1 || + _ctx.at(inputData_index).shape().rank() == 2); + + const auto k = node.param().k; + + auto values_alloc = _tensor_builder->at(outputValues_index).get(); + auto indices_alloc = _tensor_builder->at(outputIndices_index).get(); + auto input_alloc = _tensor_builder->at(inputData_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLTopKV2>(); + + fn->configure(input_alloc->handle(), k, values_alloc->handle(), indices_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Gather &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + + const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)}; + const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)}; + + const auto ifm_shape = _ctx.at(ifm_index).shape(); + + const auto ifm_rank = node.param().rank; + const auto axis_raw = node.param().axis; + const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw); + const int axis = ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value(); + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto indices_alloc = _tensor_builder->at(indices_index).get(); + + // NOTE The frontend layout and backend layout must be the same for this operation. + // If not the same, we have to add a stage(?) to perform permutation of output tensor. It + // is not not efficient even if it works well. If so, it would be better to set the + // layout of these backend tensors to the same layout. + // There is also one thing we have to think about. This operation depends on the layout of + // a model. For example, if a model in NHWC has this operation as output rank == 4, indices + // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W + // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case. + const auto backend_layout = ofm_alloc->layout(); + UNUSED_RELEASE(backend_layout); + assert(backend_layout == ifm_alloc->layout()); + assert(backend_layout == indices_alloc->layout()); + assert(ifm_rank < 4 || _current_subg_layout == backend_layout); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLGatherEx>(); + + fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Neg &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNeg>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Abs &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); + + fn->configure(input_alloc->handle(), output_alloc->handle(), act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::ArgMax &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)}; + + auto ifm_shape = _ctx.at(ifm_index).shape(); + auto ofm_shape = _ctx.at(ofm_index).shape(); + + assert((ifm_shape.rank() - 1) == ofm_shape.rank()); + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + const auto ifm_rank = node.param().rank; + auto frontend_layout = _current_subg_layout; + auto backend_layout = ifm_alloc->layout(); + + int axis_value = node.param().axis; + if (axis_value < 0) + { + axis_value += ifm_rank; + } + + auto acl_axis = + acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArgOperation>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), {acl_axis}, + ::arm_compute::ArgOperation::MAX); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Dequantize &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLCast>(); + + fn->configure(input_alloc->handle(), output_alloc->handle(), arm_compute::SubDataType::NONE); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Mean &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Mean::Input::INPUT)}; + const auto &axes{node.param().axes}; + const auto keep_dims{node.param().keep_dims}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = ifm_alloc->layout(); + + // Convert to ACL axes taking into account negative values and possible duplicates. + std::set<std::uint32_t> acl_axes; + const int ifm_rank = node.param().rank; + for (int axis : axes) + { + if (axis < 0) + axis += ifm_rank; + acl_axes.insert( + acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value()); + } + + arm_compute::Coordinates reduce_axes; + for (const auto axis : acl_axes) + { + reduce_axes.set(reduce_axes.num_dimensions(), axis); + } + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceMean>(); + + fn->configure(ifm_alloc->handle(), reduce_axes, keep_dims, ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{ + node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)}; + + auto radius = node.param().radius; + auto alpha = node.param().alpha; + auto beta = node.param().beta; + auto bias = node.param().bias; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + const auto norm_info = ::arm_compute::NormalizationLayerInfo( + ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::DepthToSpace &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)}; + + auto block_size = node.param().block_size; + assert(block_size > 0); + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthToSpace>(); + + fn->configure(input_alloc->handle(), output_alloc->handle(), block_size); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::ReduceMin &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::ReduceMin::Input::INPUT)}; + const auto &axes{node.param().axes}; + const auto keep_dims{node.param().keep_dims}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = ifm_alloc->layout(); + + // Convert to ACL axes taking into account negative values and possible duplicates. + std::set<std::uint32_t> acl_axes; + const int ifm_rank = node.param().rank; + for (int axis : axes) + { + if (axis < 0) + axis += ifm_rank; + acl_axes.insert( + acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value()); + } + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), acl_axes, keep_dims, + ::arm_compute::ReduceOperation::MIN); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Split &node) +{ + const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)}; + + assert(node.param().num_splits == static_cast<int>(node.getOutputs().size())); + + const auto ifm_rank = node.param().rank; + std::vector<ir::OperandIndex> output_indexes; + for (const auto &output : node.getOutputs()) + output_indexes.emplace_back(output); + + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + std::vector<arm_compute::ICLTensor *> output_allocs; + for (const auto &ofm_ind : output_indexes) + output_allocs.emplace_back(_tensor_builder->at(ofm_ind).get()->handle()); + + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = ifm_alloc->layout(); + auto axis = node.param().axis; + if (axis < 0) + axis += ifm_rank; + axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSplit>(); + + fn->configure(ifm_alloc->handle(), output_allocs, axis); + + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const ir::operation::Unpack &node) +{ + const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)}; + auto axis{node.param().axis}; + + const auto input_rank = node.param().rank; + + std::vector<ir::OperandIndex> output_indexes; + for (const auto &output_index : node.getOutputs()) + output_indexes.emplace_back(output_index); + + auto input = _tensor_builder->at(input_index).get()->handle(); + std::vector<arm_compute::ICLTensor *> outputs; + for (const auto &output_index : output_indexes) + outputs.emplace_back(_tensor_builder->at(output_index)->handle()); + + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = _tensor_builder->at(input_index).get()->layout(); + if (axis < 0) + axis += input_rank; + axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLUnstack>(); + + fn->configure(input, outputs, axis); + + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const ir::operation::Pad &node) +{ + const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)}; + const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)}; + const auto output_index{node.getOutputs().at(0)}; + assert(_ctx.at(pad_index).isConstant()); + + auto rank = node.param().rank; + auto pad_base = _ctx.at(pad_index).data().base(); + + auto input_type = _ctx.at(input_index).typeInfo(); + auto data_type = acl_common::asDataType(input_type.type()); + auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset()); + const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info); + + auto input = _tensor_builder->at(input_index).get()->handle(); + auto output = _tensor_builder->at(output_index).get()->handle(); + + ::arm_compute::PaddingList padding_list; + padding_list.resize(rank); + for (int32_t n = 0; n < rank; ++n) + { + const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2); + + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = _tensor_builder->at(input_index).get()->layout(); + const auto axis = + acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value(); + padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]}; + } + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPadLayer>(); + fn->configure(input, output, padding_list, pixel_value); + + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const ir::operation::Min &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLElementwiseMin>(); + + fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Max &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLElementwiseMax>(); + + fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +} // namespace acl_cl +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/acl_cl/KernelGenerator.h b/runtime/neurun/backend/acl_cl/KernelGenerator.h new file mode 100644 index 000000000..a577f1ebc --- /dev/null +++ b/runtime/neurun/backend/acl_cl/KernelGenerator.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_CL_KERNEL_GENERATOR_H__ +#define __NEURUN_BACKEND_ACL_CL_KERNEL_GENERATOR_H__ + +#include <backend/IKernelGenerator.h> + +#include "ir/Operands.h" +#include "TensorBuilder.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ + +class KernelGenerator : public IKernelGenerator +{ +public: + KernelGenerator(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder); + + void visit(const ir::OpSequence &) override; + void visit(const ir::operation::BatchToSpaceND &) override; + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::MaxPool2D &) override; + void visit(const ir::operation::AvgPool2D &) override; + void visit(const ir::operation::Concat &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::Mul &) override; + void visit(const ir::operation::ReduceSum &) override; + void visit(const ir::operation::Reshape &) override; + void visit(const ir::operation::Squeeze &) override; + void visit(const ir::operation::Tanh &) override; + void visit(const ir::operation::Softmax &) override; + void visit(const ir::operation::Slice &) override; + void visit(const ir::operation::StridedSlice &) override; + void visit(const ir::operation::Transpose &) override; + void visit(const ir::operation::Add &) override; + void visit(const ir::operation::Sub &) override; + void visit(const ir::operation::Cast &) override; + void visit(const ir::operation::Div &) override; + void visit(const ir::operation::Exp &) override; + void visit(const ir::operation::InstanceNorm &) override; + void visit(const ir::operation::Logistic &) override; + void visit(const ir::operation::ReduceMax &) override; + void visit(const ir::operation::Comparison &) override; + void visit(const ir::operation::LogicalAnd &) override; + void visit(const ir::operation::LSTM &) override; + void visit(const ir::operation::Pack &) override; + void visit(const ir::operation::Permute &) override; + void visit(const ir::operation::RSQRT &) override; + void visit(const ir::operation::ReLU &) override; + void visit(const ir::operation::ResizeBilinear &) override; + void visit(const ir::operation::ReLU1 &) override; + void visit(const ir::operation::ReLU6 &) override; + void visit(const ir::operation::RNN &) override; + void visit(const ir::operation::Floor &) override; + void visit(const ir::operation::SpaceToBatchND &) override; + void visit(const ir::operation::SpaceToDepth &) override; + void visit(const ir::operation::L2Pool2D &) override; + void visit(const ir::operation::EmbeddingLookup &) override; + void visit(const ir::operation::L2Normalization &) override; + void visit(const ir::operation::HashtableLookup &) override; + void visit(const ir::operation::PReLU &) override; + void visit(const ir::operation::TransposeConv &) override; + void visit(const ir::operation::SQRT &) override; + void visit(const ir::operation::LogicalOr &) override; + void visit(const ir::operation::LogicalNot &) override; + void visit(const ir::operation::SquaredDifference &) override; + void visit(const ir::operation::TopKV2 &) override; + void visit(const ir::operation::Gather &) override; + void visit(const ir::operation::Neg &) override; + void visit(const ir::operation::Abs &) override; + void visit(const ir::operation::ArgMax &) override; + void visit(const ir::operation::Dequantize &) override; + void visit(const ir::operation::Mean &) override; + void visit(const ir::operation::LocalResponseNormalization &) override; + void visit(const ir::operation::DepthToSpace &) override; + void visit(const ir::operation::ReduceMin &) override; + void visit(const ir::operation::Split &) override; + void visit(const ir::operation::Unpack &) override; + void visit(const ir::operation::Pad &) override; + void visit(const ir::operation::Min &) override; + void visit(const ir::operation::Max &) override; + +private: + const ir::Operands &_ctx; + std::shared_ptr<TensorBuilder> _tensor_builder; + ir::Layout _current_subg_layout; +}; + +} // namespace acl_cl +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_CL_KERNEL_GENERATOR_H__ diff --git a/runtime/neurun/backend/acl_cl/PluginClassesAllocator.cc b/runtime/neurun/backend/acl_cl/PluginClassesAllocator.cc new file mode 100644 index 000000000..ac3f0acff --- /dev/null +++ b/runtime/neurun/backend/acl_cl/PluginClassesAllocator.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <util/logging.h> + +#include "Backend.h" + +extern "C" { +neurun::backend::Backend *neurun_backend_create() +{ + VERBOSE(neurun_backend_create) << "'acl_cl' loaded\n"; + return new neurun::backend::acl_cl::Backend; +} + +void neurun_backend_destroy(neurun::backend::Backend *backend) +{ + VERBOSE(neurun_backend_create) << "'acl_cl' unloaded\n"; + delete backend; +} +} diff --git a/runtime/neurun/backend/acl_cl/ShapeFixer.cc b/runtime/neurun/backend/acl_cl/ShapeFixer.cc new file mode 100644 index 000000000..e1cbeab6c --- /dev/null +++ b/runtime/neurun/backend/acl_cl/ShapeFixer.cc @@ -0,0 +1,434 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ShapeFixer.h" + +#include <arm_compute/runtime/CL/CLFunctions.h> // Include all ARM Compute CL functions +#include <arm_compute/runtime/CL/CLFunctionsEx.h> // Include all ARM Compute EX CL functions + +#include <AclFunction.h> +#include <Convert.h> +#include <Swizzle.h> + +#include "ir/Index.h" +#include "compiler/IExecutionBuilder.h" +#include "exec/NopFunction.h" +#include "util/logging.h" +#include "util/Utils.h" +#include "util/Padding.h" + +using ::neurun::compiler::IExecutionBuilder; + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ + +using ::neurun::backend::acl_common::asAclFunction; + +ShapeFixer::ShapeFixer(const ir::Operands &ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder) + : _ctx(ctx), _tensor_builder(tensor_builder) +{ + assert(tensor_builder); +} + +void ShapeFixer::visit(const ir::operation::BatchToSpaceND &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)}; + _tensor_builder->dimCorrection(ofm_index, false); + _tensor_builder->dimCorrection(ifm_index, false); +} + +void ShapeFixer::visit(const ir::operation::Cast &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Conv2D &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::DepthwiseConv2D &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::MaxPool2D &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::AvgPool2D &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Concat &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + _tensor_builder->dimCorrection(ofm_index, false); + for (const auto &input : node.getInputs()) + _tensor_builder->dimCorrection(input, false); +} + +void ShapeFixer::visit(const ir::operation::FullyConnected &node) +{ + using ir::operation::FullyConnected; + const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)}; + const auto input_rank = _ctx.at(input_index).shape().rank(); + if (input_rank == 4) + _tensor_builder->dimCorrection(input_index, false); +} + +void ShapeFixer::visit(const ir::operation::Mul &node) +{ + const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)}; + + if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::ReduceSum &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Reshape &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)}; + _tensor_builder->dimCorrection(input_index, false); + _tensor_builder->dimCorrection(output_index, false); +} + +void ShapeFixer::visit(const ir::operation::Squeeze &node) +{ + const auto output_index{node.getOutputs().at(0)}; + if (_ctx.at(output_index).shape().rank() == 0) + const_cast<ir::Shape &>(_ctx.at(output_index).shape()).extendRank(1); + const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)}; + _tensor_builder->dimCorrection(input_index, false); + _tensor_builder->dimCorrection(output_index, false); +} + +void ShapeFixer::visit(const ir::operation::Tanh &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Softmax &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Slice &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::StridedSlice &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)}; + _tensor_builder->dimCorrection(ofm_index, false); + _tensor_builder->dimCorrection(ifm_index, false); +} + +void ShapeFixer::visit(const ir::operation::Transpose &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Add &node) +{ + const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)}; + + if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); + const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::Sub &node) +{ + const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)}; + + if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::Div &node) +{ + const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)}; + + if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::Exp &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::InstanceNorm &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Logistic &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::LogicalAnd &node) +{ + const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)}; + const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)}; + + if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank()); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::LSTM &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::ReduceMax &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Comparison &node) +{ + const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)}; + const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)}; + + if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank()); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::Pack &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + _tensor_builder->dimCorrection(ofm_index, false); + for (const auto &inputs : node.getInputs()) + { + _tensor_builder->dimCorrection(inputs, false); + const auto ofm_rank = _ctx.at(ofm_index).shape().rank(); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(inputs).shape()).extendRank(ofm_rank); + } +} + +void ShapeFixer::visit(const ir::operation::Permute &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::RSQRT &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::ReLU &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::ResizeBilinear &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::ReLU1 &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::ReLU6 &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::RNN &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Floor &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::SpaceToBatchND &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)}; + _tensor_builder->dimCorrection(ofm_index, false); + _tensor_builder->dimCorrection(ifm_index, false); +} + +void ShapeFixer::visit(const ir::operation::SpaceToDepth &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)}; + _tensor_builder->dimCorrection(ofm_index, false); + _tensor_builder->dimCorrection(ifm_index, false); +} + +void ShapeFixer::visit(const ir::operation::L2Pool2D &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::EmbeddingLookup &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)}; + _tensor_builder->dimCorrection(values_index, false); + _tensor_builder->dimCorrection(output_index, false); +} + +void ShapeFixer::visit(const ir::operation::L2Normalization &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::HashtableLookup &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::PReLU &node) +{ + const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)}; + const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)}; + + if (!(_ctx.at(ifm_index).shape() == _ctx.at(alpha_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(ifm_index).shape().rank(), _ctx.at(alpha_index).shape().rank()); + const_cast<ir::Shape &>(_ctx.at(ifm_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(alpha_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::TransposeConv &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::SQRT &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::LogicalOr &node) +{ + const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)}; + const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)}; + + if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank()); + const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::LogicalNot &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::SquaredDifference &node) +{ + const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)}; + + if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); + const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::TopKV2 &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Gather &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)}; + const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)}; + _tensor_builder->dimCorrection(ofm_index, false); + _tensor_builder->dimCorrection(ifm_index, false); + _tensor_builder->dimCorrection(indices_index, false); +} + +void ShapeFixer::visit(const ir::operation::Neg &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Abs &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::ArgMax &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)}; + _tensor_builder->dimCorrection(ofm_index, false); + _tensor_builder->dimCorrection(ifm_index, false); +} + +void ShapeFixer::visit(const ir::operation::Dequantize &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Mean &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::LocalResponseNormalization &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::DepthToSpace &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::ReduceMin &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Split &node) +{ + const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)}; + _tensor_builder->dimCorrection(input_index, false); + for (const auto &output : node.getOutputs()) + _tensor_builder->dimCorrection(output, false); +} + +void ShapeFixer::visit(const ir::operation::Unpack &node) +{ + const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)}; + _tensor_builder->dimCorrection(input_index, false); + for (const auto &output_index : node.getOutputs()) + _tensor_builder->dimCorrection(output_index, false); +} + +void ShapeFixer::visit(const ir::operation::Pad &node) +{ + const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)}; + const auto output_index{node.getOutputs().at(0)}; + _tensor_builder->dimCorrection(input_index, false); + _tensor_builder->dimCorrection(output_index, false); +} + +void ShapeFixer::visit(const ir::operation::Min &node) +{ + const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)}; + + if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::Max &node) +{ + const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)}; + + if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); + } +} + +} // namespace acl_cl +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/acl_cl/ShapeFixer.h b/runtime/neurun/backend/acl_cl/ShapeFixer.h new file mode 100644 index 000000000..ec5f5c896 --- /dev/null +++ b/runtime/neurun/backend/acl_cl/ShapeFixer.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_CL_SHAPE_FIXER_H__ +#define __NEURUN_BACKEND_ACL_CL_SHAPE_FIXER_H__ + +#include <backend/IShapeFixer.h> + +#include "ir/Operands.h" +#include "TensorBuilder.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ + +class ShapeFixer : public IShapeFixer +{ +public: + ShapeFixer(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder); + + void visit(const ir::operation::BatchToSpaceND &) override; + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::MaxPool2D &) override; + void visit(const ir::operation::AvgPool2D &) override; + void visit(const ir::operation::Concat &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::Mul &) override; + void visit(const ir::operation::ReduceSum &) override; + void visit(const ir::operation::Reshape &) override; + void visit(const ir::operation::Squeeze &) override; + void visit(const ir::operation::Tanh &) override; + void visit(const ir::operation::Softmax &) override; + void visit(const ir::operation::Slice &) override; + void visit(const ir::operation::StridedSlice &) override; + void visit(const ir::operation::Transpose &) override; + void visit(const ir::operation::Add &) override; + void visit(const ir::operation::Sub &) override; + void visit(const ir::operation::Cast &) override; + void visit(const ir::operation::Div &) override; + void visit(const ir::operation::Exp &) override; + void visit(const ir::operation::InstanceNorm &) override; + void visit(const ir::operation::Logistic &) override; + void visit(const ir::operation::ReduceMax &) override; + void visit(const ir::operation::Comparison &) override; + void visit(const ir::operation::LogicalAnd &) override; + void visit(const ir::operation::LSTM &) override; + void visit(const ir::operation::Pack &) override; + void visit(const ir::operation::Permute &) override; + void visit(const ir::operation::RSQRT &) override; + void visit(const ir::operation::ReLU &) override; + void visit(const ir::operation::ResizeBilinear &) override; + void visit(const ir::operation::ReLU1 &) override; + void visit(const ir::operation::ReLU6 &) override; + void visit(const ir::operation::RNN &) override; + void visit(const ir::operation::Floor &) override; + void visit(const ir::operation::SpaceToBatchND &) override; + void visit(const ir::operation::SpaceToDepth &) override; + void visit(const ir::operation::L2Pool2D &) override; + void visit(const ir::operation::EmbeddingLookup &) override; + void visit(const ir::operation::L2Normalization &) override; + void visit(const ir::operation::HashtableLookup &) override; + void visit(const ir::operation::PReLU &) override; + void visit(const ir::operation::TransposeConv &) override; + void visit(const ir::operation::SQRT &) override; + void visit(const ir::operation::LogicalOr &) override; + void visit(const ir::operation::LogicalNot &) override; + void visit(const ir::operation::SquaredDifference &) override; + void visit(const ir::operation::TopKV2 &) override; + void visit(const ir::operation::Gather &) override; + void visit(const ir::operation::Neg &) override; + void visit(const ir::operation::Abs &) override; + void visit(const ir::operation::ArgMax &) override; + void visit(const ir::operation::Dequantize &) override; + void visit(const ir::operation::Mean &) override; + void visit(const ir::operation::LocalResponseNormalization &) override; + void visit(const ir::operation::DepthToSpace &) override; + void visit(const ir::operation::ReduceMin &) override; + void visit(const ir::operation::Split &) override; + void visit(const ir::operation::Unpack &) override; + void visit(const ir::operation::Pad &) override; + void visit(const ir::operation::Min &) override; + void visit(const ir::operation::Max &) override; + +private: + const ir::Operands &_ctx; + std::shared_ptr<TensorBuilder> _tensor_builder; +}; + +} // namespace acl_cl +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_CL_SHAPE_FIXER_H__ diff --git a/runtime/neurun/backend/acl_cl/TensorBuilder.h b/runtime/neurun/backend/acl_cl/TensorBuilder.h new file mode 100644 index 000000000..b9a0dd4a6 --- /dev/null +++ b/runtime/neurun/backend/acl_cl/TensorBuilder.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_CL_TENSOR_BUILDER_H__ +#define __NEURUN_BACKEND_ACL_CL_TENSOR_BUILDER_H__ + +#include <TemplTensorBuilder.h> + +#include "operand/CLTensor.h" +#include "operand/CLSubTensor.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ + +using TensorBuilder = + acl_common::TemplTensorBuilder<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>; + +} // namespace acl_cl +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_CL_TENSOR_BUILDER_H__ diff --git a/runtime/neurun/backend/acl_cl/TensorManager.h b/runtime/neurun/backend/acl_cl/TensorManager.h new file mode 100644 index 000000000..fd2a9059c --- /dev/null +++ b/runtime/neurun/backend/acl_cl/TensorManager.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_CL_TENSOR_MANAGER_H__ +#define __NEURUN_BACKEND_ACL_CL_TENSOR_MANAGER_H__ + +#include <arm_compute/runtime/CL/CLBufferAllocator.h> +#include <arm_compute/runtime/PoolManager.h> +#include <arm_compute/runtime/BlobLifetimeManager.h> +#include <arm_compute/runtime/MemoryManagerOnDemand.h> +#include <arm_compute/runtime/CL/CLMemoryGroup.h> + +#include <AclMemoryManager.h> +#include <AclLinearMemoryManager.h> +#include <AclInternalBufferManager.h> +#include <AclTensorManager.h> + +#include "operand/CLTensor.h" +#include "operand/CLSubTensor.h" + +#include "util/logging.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ + +using MemoryManager = + acl_common::AclMemoryManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>; + +using LinearMemoryManager = acl_common::AclLinearMemoryManager< + operand::ICLTensor, operand::CLTensor, operand::CLSubTensor, + ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager, + ::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator, + ::arm_compute::CLMemoryGroup>; + +using InternalBufferManager = acl_common::AclInternalBufferManager< + ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager, + ::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator>; + +using TensorManager = + acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>; + +TensorManager *createTensorManager() +{ + const std::string executor_str = util::getConfigString(util::config::EXECUTOR); + + if (executor_str == "Linear") + { + VERBOSE(acl_cl_createTensorManager) << "AclTensorManager as Linear" << std::endl; + return new TensorManager(new MemoryManager(), new LinearMemoryManager(), + new InternalBufferManager()); + } + else + { + VERBOSE(acl_cl_createTensorManager) << "AclTensorManager" << std::endl; + return new TensorManager(new MemoryManager(), new MemoryManager(), new InternalBufferManager()); + } +} + +} // namespace acl_cl +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_CL_TENSOR_MANAGER_H__ diff --git a/runtime/neurun/backend/acl_cl/TensorRegister.h b/runtime/neurun/backend/acl_cl/TensorRegister.h new file mode 100644 index 000000000..02de45580 --- /dev/null +++ b/runtime/neurun/backend/acl_cl/TensorRegister.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_CL_TENSOR_REGISTER_H__ +#define __NEURUN_BACKEND_ACL_CL_TENSOR_REGISTER_H__ + +#include <AclTensorRegister.h> +#include <misc/polymorphic_downcast.h> +#include "TensorBuilder.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ + +class TensorRegister : public acl_common::AclTensorRegister +{ +public: + TensorRegister(const ir::Operands &operands, const std::shared_ptr<TensorBuilder> &tensor_builder) + : acl_common::AclTensorRegister{operands, tensor_builder} + { + // DO NOTHING + } + + void setUsesCount(const ir::OperandIndex &ind, size_t num_uses) const override + { + nnfw::misc::polymorphic_downcast<TensorBuilder *>(tensor_builder().get()) + ->setUsesCount(ind, num_uses); + } +}; + +} // namespace acl_cl +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_CL_TENSOR_REGISTER_H__ diff --git a/runtime/neurun/backend/acl_cl/operand/CLSubTensor.cc b/runtime/neurun/backend/acl_cl/operand/CLSubTensor.cc new file mode 100644 index 000000000..70c8829d9 --- /dev/null +++ b/runtime/neurun/backend/acl_cl/operand/CLSubTensor.cc @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CLSubTensor.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ +namespace operand +{ + +CLSubTensor::CLSubTensor(ICLTensor *parent, const arm_compute::TensorShape &tensor_shape, + const arm_compute::Coordinates &coords, size_t rank, bool extend_parent) + : _cl_sub_tensor(std::make_shared<arm_compute::CLSubTensor>(parent->handle(), tensor_shape, + coords, extend_parent)), + _rank{rank} +{ + // DO NOTHING +} + +const arm_compute::CLSubTensor *CLSubTensor::handle() const { return _cl_sub_tensor.get(); } + +arm_compute::CLSubTensor *CLSubTensor::handle() { return _cl_sub_tensor.get(); } + +} // namespace operand +} // namespace acl_cl +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/acl_cl/operand/CLSubTensor.h b/runtime/neurun/backend/acl_cl/operand/CLSubTensor.h new file mode 100644 index 000000000..8eba3760f --- /dev/null +++ b/runtime/neurun/backend/acl_cl/operand/CLSubTensor.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_CL_OPERAND_CL_SUB_TENSOR_H__ +#define __NEURUN_BACKEND_ACL_CL_OPERAND_CL_SUB_TENSOR_H__ + +#include <arm_compute/runtime/CL/CLSubTensor.h> +#include "ICLTensor.h" +#include "compiler/SubTensorInfo.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ +namespace operand +{ + +class CLSubTensor : public ICLTensor +{ +public: + CLSubTensor() = delete; + +public: + CLSubTensor(ICLTensor *parent, const arm_compute::TensorShape &tensor_shape, + const arm_compute::Coordinates &coords, size_t rank, bool extend_parent = false); + +public: + size_t num_dimensions() const final { return _rank; } + +public: + const arm_compute::CLSubTensor *handle() const override; + arm_compute::CLSubTensor *handle() override; + +public: + // This method is used to prevent the use of memcpy for SubTensor + bool has_padding() const override { return true; } + +private: + std::shared_ptr<arm_compute::CLSubTensor> _cl_sub_tensor; + size_t _rank; +}; + +} // namespace operand +} // namespace acl_cl +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_CL_OPERAND_CL_SUB_TENSOR_H__ diff --git a/runtime/neurun/backend/acl_cl/operand/CLTensor.cc b/runtime/neurun/backend/acl_cl/operand/CLTensor.cc new file mode 100644 index 000000000..dab74e65f --- /dev/null +++ b/runtime/neurun/backend/acl_cl/operand/CLTensor.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CLTensor.h" + +#include <arm_compute/runtime/CL/CLScheduler.h> +#include <arm_compute/runtime/CL/CLMemory.h> +#include <arm_compute/runtime/CL/CLMemoryRegion.h> + +#include <Convert.h> + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ +namespace operand +{ + +CLTensor::CLTensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses) + : _cl_tensor(std::make_shared<arm_compute::CLTensor>()), _rank{rank}, _num_uses{num_uses} +{ + allocator()->init(info); +} + +const arm_compute::CLTensor *CLTensor::handle() const { return _cl_tensor.get(); } + +arm_compute::CLTensor *CLTensor::handle() { return _cl_tensor.get(); } + +arm_compute::CLTensorAllocator *CLTensor::allocator() { return _cl_tensor->allocator(); } + +void CLTensor::map(bool blocking) { _cl_tensor->map(blocking); } + +void CLTensor::unmap() { _cl_tensor->unmap(); } + +void CLTensor::setBuffer(void *host_ptr) +{ + // Constructs a Buffer on a user-supplied memory + auto buffer = cl::Buffer(arm_compute::CLScheduler::get().context(), + CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, info()->total_size(), host_ptr); + // import memory + allocator()->import_memory(buffer); +} + +} // namespace operand +} // namespace acl_cl +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/acl_cl/operand/CLTensor.h b/runtime/neurun/backend/acl_cl/operand/CLTensor.h new file mode 100644 index 000000000..8518bf0c3 --- /dev/null +++ b/runtime/neurun/backend/acl_cl/operand/CLTensor.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_CL_OPERAND_CL_TENSOR_H__ +#define __NEURUN_BACKEND_ACL_CL_OPERAND_CL_TENSOR_H__ + +#include <arm_compute/core/TensorInfo.h> +#include <arm_compute/runtime/CL/CLTensor.h> +#include <arm_compute/runtime/CL/CLScheduler.h> +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "ICLTensor.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ +namespace operand +{ + +class CLTensor : public ICLTensor +{ +public: + CLTensor() = delete; + +public: + CLTensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses); + +public: + size_t num_dimensions() const final { return _rank; } + +public: + const arm_compute::CLTensor *handle() const override; + arm_compute::CLTensor *handle() override; + size_t num_uses() const { return _num_uses; } + +public: + arm_compute::CLTensorAllocator *allocator(); + void map(bool blocking = true); + void unmap(); + /** Set given buffer as the buffer of the tensor + * + * @note Ownership of the memory is not transferred to this object. + * Thus management (allocate/free) should be done by the client. + * + * @param[in] host_ptr Storage to be used. + */ + void setBuffer(void *host_ptr); + +private: + std::shared_ptr<arm_compute::CLTensor> _cl_tensor; + size_t _rank; + size_t _num_uses; +}; + +} // namespace operand +} // namespace acl_cl +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_CL_OPERAND_CL_TENSOR_H__ diff --git a/runtime/neurun/backend/acl_cl/operand/ICLTensor.cc b/runtime/neurun/backend/acl_cl/operand/ICLTensor.cc new file mode 100644 index 000000000..6b14584e0 --- /dev/null +++ b/runtime/neurun/backend/acl_cl/operand/ICLTensor.cc @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ICLTensor.h" + +#include <arm_compute/runtime/CL/CLScheduler.h> + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ +namespace operand +{ + +void ICLTensor::access(const std::function<void(ITensor &tensor)> &fn) +{ + auto &queue = ::arm_compute::CLScheduler::get().queue(); + + // This is an optional input + if (total_size() == 0) + return; + + map(queue); + fn(*this); + unmap(queue); +} +} // namespace operand +} // namespace acl_cl +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/acl_cl/operand/ICLTensor.h b/runtime/neurun/backend/acl_cl/operand/ICLTensor.h new file mode 100644 index 000000000..68e4e7fc5 --- /dev/null +++ b/runtime/neurun/backend/acl_cl/operand/ICLTensor.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_CL_OPERAND_I_CL_TENSOR_H__ +#define __NEURUN_BACKEND_ACL_CL_OPERAND_I_CL_TENSOR_H__ + +#include <arm_compute/core/CL/ICLTensor.h> + +#include <IACLTensor.h> + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ +namespace operand +{ + +class ICLTensor : public acl_common::IACLTensor +{ +public: + const arm_compute::ICLTensor *handle() const override = 0; + arm_compute::ICLTensor *handle() override = 0; + +public: + void map(cl::CommandQueue &q, bool blocking = true) { return handle()->map(q, blocking); } + void unmap(cl::CommandQueue &q) { return handle()->unmap(q); } + void access(const std::function<void(ITensor &tensor)> &fn) final; +}; + +} // namespace operand +} // namespace acl_cl +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_CL_OPERAND_I_CL_TENSOR_H__ diff --git a/runtime/neurun/backend/acl_common/AclFunction.h b/runtime/neurun/backend/acl_common/AclFunction.h new file mode 100644 index 000000000..a63f3807b --- /dev/null +++ b/runtime/neurun/backend/acl_common/AclFunction.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_COMMON_KERNEL_ACL_FUNCTION_H__ +#define __NEURUN_BACKEND_ACL_COMMON_KERNEL_ACL_FUNCTION_H__ + +#include <exec/IFunction.h> +#include <arm_compute/runtime/IFunction.h> +#include <arm_compute/runtime/CL/CLScheduler.h> +#include <memory> + +namespace neurun +{ +namespace backend +{ +namespace acl_common +{ + +class AclFunction : public ::neurun::exec::IFunction +{ +public: + AclFunction() = delete; + +public: + AclFunction(std::unique_ptr<::arm_compute::IFunction> &&func) : _func(std::move(func)) + { + // DO NOTHING + } + +public: + void run() override { _func->run(); } + void runSync() override + { + run(); + arm_compute::CLScheduler::get().sync(); + } + void prepare() override { _func->prepare(); } + +private: + std::unique_ptr<::arm_compute::IFunction> _func; +}; + +} // namespace acl_common +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_COMMON_KERNEL_ACL_FUNCTION_H__ diff --git a/runtime/neurun/backend/acl_common/AclInternalBufferManager.h b/runtime/neurun/backend/acl_common/AclInternalBufferManager.h new file mode 100644 index 000000000..b7183c86e --- /dev/null +++ b/runtime/neurun/backend/acl_common/AclInternalBufferManager.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_COMMON_INTERNAL_BUFFER_MANAGER_H__ +#define __NEURUN_BACKEND_ACL_COMMON_INTERNAL_BUFFER_MANAGER_H__ + +#include <arm_compute/runtime/IMemoryManager.h> +#include <cassert> +#include <memory> +#include <backend/IMemoryManager.h> + +namespace neurun +{ +namespace backend +{ +namespace acl_common +{ + +// NOTE. If any backend can use something like InternalBufferManager, +// this interface can be moved to core/include/backend/ +/** + * @brief Interface for InternalBufferManager which has ::arm_compute::IMemoryManager pointer + */ +struct IInternalBufferManager : public backend::IMemoryManager +{ + virtual ~IInternalBufferManager() = default; + + /** + * @brief Get shared_ptr of ::arm_compute::IMemoryManager + */ + virtual std::shared_ptr<::arm_compute::IMemoryManager> internal_buffer_manager(void) = 0; +}; + +/** + * @brief class for InternalBufferManager which has ::arm_compute::IMemoryManager pointer + */ +template <typename T_MemoryManager, typename T_PoolManager, typename T_LifetimeManager, + typename T_Allocator> +class AclInternalBufferManager : public IInternalBufferManager +{ +public: + AclInternalBufferManager() : _allocator{nullptr} + { + std::shared_ptr<T_LifetimeManager> lifetime_mgr = std::make_shared<T_LifetimeManager>(); + std::shared_ptr<T_PoolManager> pool_mgr = std::make_shared<T_PoolManager>(); + + _internal_manager = std::make_shared<T_MemoryManager>(lifetime_mgr, pool_mgr); + assert(_internal_manager); + } + + virtual ~AclInternalBufferManager() = default; + + /** + * @brief Allocate the internal buffer manager on acl + */ + void allocate(void) override + { + _allocator = std::make_shared<T_Allocator>(); + _internal_manager->populate(*_allocator, 1); + } + + /** + * @brief Deallocate the internal buffer manager on acl + */ + void deallocate(void) override { _internal_manager->clear(); } + + /** + * @brief Get shared_ptr of ::arm_compute::IMemoryManager + */ + std::shared_ptr<::arm_compute::IMemoryManager> internal_buffer_manager(void) override + { + return _internal_manager; + } + +private: + std::shared_ptr<T_Allocator> _allocator; + std::shared_ptr<T_MemoryManager> _internal_manager; +}; + +} // namespace acl_common +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_COMMON_INTERNAL_BUFFER_MANAGER_H__ diff --git a/runtime/neurun/backend/acl_common/AclLinearMemoryManager.h b/runtime/neurun/backend/acl_common/AclLinearMemoryManager.h new file mode 100644 index 000000000..7ed719bc3 --- /dev/null +++ b/runtime/neurun/backend/acl_common/AclLinearMemoryManager.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_COMMON_LINEAR_MEMORY_MANAGER_H__ +#define __NEURUN_BACKEND_ACL_COMMON_LINEAR_MEMORY_MANAGER_H__ + +#include <cassert> + +#include "AclMemoryManager.h" +#include "ir/OperandIndexMap.h" +#include "util/logging.h" + +namespace +{ + +template <typename T_MemoryManager, typename T_PoolManager, typename T_LifetimeManager> +std::shared_ptr<T_MemoryManager> createMemoryManager() +{ + std::shared_ptr<T_LifetimeManager> lifetime_mgr = std::make_shared<T_LifetimeManager>(); + std::shared_ptr<T_PoolManager> pool_mgr = std::make_shared<T_PoolManager>(); + + std::shared_ptr<T_MemoryManager> mem_mgr = + std::make_shared<T_MemoryManager>(lifetime_mgr, pool_mgr); + return mem_mgr; +} + +} // namespace anonymous + +namespace neurun +{ +namespace backend +{ +namespace acl_common +{ + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor, typename T_MemoryManager, + typename T_PoolManager, typename T_LifetimeManager, typename T_Allocator, + typename T_MemoryGroup> +class AclLinearMemoryManager : public AclMemoryManager<T_ITensor, T_Tensor, T_SubTensor> +{ +public: + AclLinearMemoryManager() + : _allocator{nullptr}, + _io_manager{createMemoryManager<T_MemoryManager, T_PoolManager, T_LifetimeManager>()}, + _io_group{std::make_shared<T_MemoryGroup>(_io_manager)} + { + // DO NOTHING + } + + virtual ~AclLinearMemoryManager() = default; + + void allocate(void) override + { + _allocator = std::make_shared<T_Allocator>(); + _io_manager->populate(*_allocator, 1); + _io_group->acquire(); + } + + void deallocate(void) override + { + _io_group->release(); + _io_manager->clear(); + } + + void startLifetime(const ir::OperandIndex &ind) override + { + auto &tensors = this->tensors(); + assert(tensors.find(ind) != tensors.end()); + + auto tensor = tensors[ind]; + assert(tensor->handle()); + + _io_group->manage(tensor->handle()); + } + + void finishLifetime(const ir::OperandIndex &ind) override + { + auto &tensors = this->tensors(); + assert(tensors.find(ind) != tensors.end()); + + auto tensor = tensors[ind]; + assert(tensor->allocator()); + + tensor->allocator()->allocate(); + } + +private: + std::shared_ptr<T_Allocator> _allocator; + std::shared_ptr<T_MemoryManager> _io_manager; + std::shared_ptr<T_MemoryGroup> _io_group; +}; + +} // namespace acl_common +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_COMMON_LINEAR_MEMORY_MANAGER_H__ diff --git a/runtime/neurun/backend/acl_common/AclMemoryManager.h b/runtime/neurun/backend/acl_common/AclMemoryManager.h new file mode 100644 index 000000000..af7f3a460 --- /dev/null +++ b/runtime/neurun/backend/acl_common/AclMemoryManager.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__ +#define __NEURUN_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__ + +#include <arm_compute/core/Types.h> +#include <arm_compute/runtime/IMemoryManager.h> +#include <cassert> + +#include "backend/IMemoryManager.h" +#include "ir/OperandIndexMap.h" +#include "Convert.h" +#include "util/logging.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_common +{ + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +class AclMemoryManager : public backend::IMemoryManager +{ +public: + AclMemoryManager() + { + // DO NOTHING + } + + virtual ~AclMemoryManager() = default; + + void allocate(void) override + { + for (const auto &tensor_entry : _tensors) + { + auto tensor = tensor_entry.second; + tensor->allocator()->allocate(); + } + } + + void deallocate(void) override + { + for (const auto &tensor_entry : _tensors) + { + auto tensor = tensor_entry.second; + tensor->allocator()->free(); + } + } + + virtual void startLifetime(const ir::OperandIndex &) { /* DO NOTHING */} + virtual void finishLifetime(const ir::OperandIndex &) { /* DO NOTHING */} + + void buildTensor(const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank, + size_t num_uses) + { + auto tensor = std::make_shared<T_Tensor>(info, rank, num_uses); + _tensors[ind] = tensor; + } + + void buildSubtensor(std::shared_ptr<T_ITensor> parent_tensor, const ir::OperandIndex &child_ind, + const ::arm_compute::TensorShape &shape, + const ::arm_compute::Coordinates &coordinates, size_t rank, + bool extent_parent) + { + auto subtensor = + std::make_shared<T_SubTensor>(parent_tensor.get(), shape, coordinates, rank, extent_parent); + _subtensors[child_ind] = subtensor; + } + + ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &tensors(void) { return _tensors; } + + ir::OperandIndexMap<std::shared_ptr<T_SubTensor>> &subtensors(void) { return _subtensors; } + +private: + ir::OperandIndexMap<std::shared_ptr<T_Tensor>> _tensors; + ir::OperandIndexMap<std::shared_ptr<T_SubTensor>> _subtensors; +}; + +} // namespace acl_common +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__ diff --git a/runtime/neurun/backend/acl_common/AclTensorManager.h b/runtime/neurun/backend/acl_common/AclTensorManager.h new file mode 100644 index 000000000..48a4c2599 --- /dev/null +++ b/runtime/neurun/backend/acl_common/AclTensorManager.h @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__ +#define __NEURUN_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__ + +#include <arm_compute/runtime/IMemoryManager.h> + +#include "backend/ITensorManager.h" +#include "AclMemoryManager.h" +#include "AclInternalBufferManager.h" +#include "ir/OperandIndexMap.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_common +{ + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +class AclTensorManager : public backend::ITensorManager +{ +public: + using T_AclMemoryManager = AclMemoryManager<T_ITensor, T_Tensor, T_SubTensor>; + + AclTensorManager(T_AclMemoryManager *const_mgr, T_AclMemoryManager *nonconst_mgr, + IInternalBufferManager *inter_mgr); + + virtual ~AclTensorManager() = default; + + void allocateConsts(void) override; + void allocateNonconsts(void) override; + void deallocateConsts(void) override; + void deallocateNonconsts(void) override; + + void allocateInternalBufferManager(void); + void deallocateInternalBufferManager(void); + + void buildTensor(const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank, + bool as_const, size_t num_uses); + void buildSubtensor(const ir::OperandIndex &parent, const ir::OperandIndex &child, + const ::arm_compute::TensorShape &shape, + const ::arm_compute::Coordinates &coordinates, size_t rank, + bool extent_parent); + + std::shared_ptr<T_ITensor> findTensorAsParent(const ir::OperandIndex &ind); + + void startLifetime(const ir::OperandIndex &ind); + void finishLifetime(const ir::OperandIndex &ind); + + std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind); + + ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &constTensors(void); + ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &nonconstTensors(void); + ir::OperandIndexMap<std::shared_ptr<T_SubTensor>> &nonconstSubtensors(void); + + std::shared_ptr<::arm_compute::IMemoryManager> internal_buffer_manager(void); + + void iterate(const std::function<void(const ir::OperandIndex &)> &fn); + + void tryDeallocConstants(void); + +private: + std::unique_ptr<T_AclMemoryManager> _const_mgr; + std::unique_ptr<T_AclMemoryManager> _nonconst_mgr; + std::unique_ptr<IInternalBufferManager> _inter_mgr; + ir::OperandIndexMap<T_AclMemoryManager &> _ind_to_mgr; +}; + +} // namespace acl_common +} // namespace backend +} // namespace neurun + +#include <cassert> +#include "util/logging.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_common +{ + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::AclTensorManager( + T_AclMemoryManager *const_mgr, T_AclMemoryManager *nonconst_mgr, + IInternalBufferManager *inter_mgr) + : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr}, _inter_mgr{inter_mgr} +{ + // DO NOTHING +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::allocateConsts(void) +{ + _const_mgr->allocate(); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::allocateNonconsts(void) +{ + _nonconst_mgr->allocate(); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::deallocateConsts(void) +{ + _const_mgr->deallocate(); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::deallocateNonconsts(void) +{ + _nonconst_mgr->deallocate(); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::allocateInternalBufferManager(void) +{ + _inter_mgr->allocate(); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::deallocateInternalBufferManager(void) +{ + _inter_mgr->deallocate(); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::buildTensor( + const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank, bool as_const, + size_t num_uses) +{ + assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end()); + if (as_const) + { + _const_mgr->buildTensor(ind, info, rank, num_uses); + _ind_to_mgr.insert({ind, *_const_mgr}); + } + else + { + _nonconst_mgr->buildTensor(ind, info, rank, num_uses); + _ind_to_mgr.insert({ind, *_nonconst_mgr}); + } +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::buildSubtensor( + const ir::OperandIndex &parent, const ir::OperandIndex &child, + const ::arm_compute::TensorShape &shape, const ::arm_compute::Coordinates &coordinates, + size_t rank, bool extent_parent) +{ + assert(_ind_to_mgr.find(child) == _ind_to_mgr.end()); + std::shared_ptr<T_ITensor> parent_tensor = findTensorAsParent(parent); + assert(parent_tensor); + _nonconst_mgr->buildSubtensor(parent_tensor, child, shape, coordinates, rank, extent_parent); + _ind_to_mgr.insert({child, *_nonconst_mgr}); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +std::shared_ptr<T_ITensor> +AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::findTensorAsParent(const ir::OperandIndex &ind) +{ + + auto &tensors = _nonconst_mgr->tensors(); + auto &subtensors = _nonconst_mgr->subtensors(); + if (tensors.find(ind) != tensors.end()) + { + // Parent is allocated as tensor + return tensors[ind]; + } + else if (subtensors.find(ind) != subtensors.end()) + { + // Parent is allocated as subtensor + return subtensors[ind]; + } + else + { + return nullptr; + } +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::startLifetime(const ir::OperandIndex &ind) +{ + assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end()); + _ind_to_mgr.at(ind).startLifetime(ind); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::finishLifetime(const ir::OperandIndex &ind) +{ + assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end()); + _ind_to_mgr.at(ind).finishLifetime(ind); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +std::shared_ptr<T_ITensor> +AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::at(const ir::OperandIndex &ind) +{ + assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end()); + + auto &tensors = _ind_to_mgr.at(ind).tensors(); + if (tensors.find(ind) != tensors.end()) + { + return tensors.at(ind); + } + else + { + return _ind_to_mgr.at(ind).subtensors().at(ind); + } +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +ir::OperandIndexMap<std::shared_ptr<T_Tensor>> & +AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::constTensors(void) +{ + return _const_mgr->tensors(); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +ir::OperandIndexMap<std::shared_ptr<T_Tensor>> & +AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::nonconstTensors(void) +{ + return _nonconst_mgr->tensors(); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +ir::OperandIndexMap<std::shared_ptr<T_SubTensor>> & +AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::nonconstSubtensors(void) +{ + return _nonconst_mgr->subtensors(); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +std::shared_ptr<::arm_compute::IMemoryManager> +AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::internal_buffer_manager(void) +{ + return _inter_mgr->internal_buffer_manager(); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::iterate( + const std::function<void(const ir::OperandIndex &)> &fn) +{ + for (auto it : _nonconst_mgr->tensors()) + fn(it.first); + + for (auto it : _nonconst_mgr->subtensors()) + fn(it.first); + + for (auto it : _const_mgr->tensors()) + fn(it.first); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::tryDeallocConstants(void) +{ + auto &tensors = _const_mgr->tensors(); + + for (auto it = tensors.begin(); it != tensors.end();) + { + const auto &ind = it->first; + auto tensor = it->second; + // NOTE The condition "tensor->num_uses() < 2" is used to prevent deallocating a constant tensor + // used in several nodes. + if (tensor->handle() && !tensor->handle()->is_used() && tensor->num_uses() < 2) + { + VERBOSE(AclTensorManager) << "Tensor #" << ind.value() + << " will be deallocated as an unused constant tensor" << std::endl; + tensor->allocator()->free(); + tensor.reset(); + it = tensors.erase(it); + } + else + { + ++it; + } + } +} + +} // namespace acl_common +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__ diff --git a/runtime/neurun/backend/acl_common/AclTensorRegister.cc b/runtime/neurun/backend/acl_common/AclTensorRegister.cc new file mode 100644 index 000000000..07cbece1e --- /dev/null +++ b/runtime/neurun/backend/acl_common/AclTensorRegister.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "AclTensorRegister.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_common +{ + +AclTensorRegister::AclTensorRegister(const ir::Operands &operands, + const std::shared_ptr<ITensorBuilder> &tensor_builder) + : _operands{operands}, _tensor_builder{tensor_builder} +{ + assert(tensor_builder != nullptr); +} + +void AclTensorRegister::visit(const ir::OpSequence &op_seq) +{ + for (const auto &e : op_seq.operations()) + { + const auto &node = *(e.node); + node.accept(*this); + // Set count of nodes to use operand + for (const auto &input : node.getInputs()) + { + setUsesCount(input, _operands.at(input).getUses().size()); + } + } +} + +} // namespace acl_common +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/acl_common/AclTensorRegister.h b/runtime/neurun/backend/acl_common/AclTensorRegister.h new file mode 100644 index 000000000..905e19adb --- /dev/null +++ b/runtime/neurun/backend/acl_common/AclTensorRegister.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_COMMON_TENSOR_REGISTER_H__ +#define __NEURUN_BACKEND_ACL_COMMON_TENSOR_REGISTER_H__ + +#include <backend/ITensorRegister.h> + +namespace neurun +{ +namespace backend +{ +namespace acl_common +{ + +class AclTensorRegister : public ITensorRegister +{ +protected: + AclTensorRegister(const ir::Operands &operands, + const std::shared_ptr<ITensorBuilder> &tensor_builder); + +public: + virtual ~AclTensorRegister() = default; + +protected: + void visit(const ir::OpSequence &op_seq); + virtual void setUsesCount(const ir::OperandIndex &ind, size_t num_uses) const = 0; + +protected: + const ir::Operands &operands() const override { return _operands; } + std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; } + bool supportSubTensor() const final { return true; } + +private: + const ir::Operands &_operands; + const std::shared_ptr<ITensorBuilder> _tensor_builder; +}; + +} // namespace acl_common +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_COMMON_TENSOR_REGISTER_H__ diff --git a/runtime/neurun/backend/acl_common/CMakeLists.txt b/runtime/neurun/backend/acl_common/CMakeLists.txt new file mode 100644 index 000000000..32d01291f --- /dev/null +++ b/runtime/neurun/backend/acl_common/CMakeLists.txt @@ -0,0 +1,19 @@ +# Unsupported architecture +nnas_find_package(ARMCompute QUIET) +if(NOT ARMCompute_FOUND) + return() +endif(NOT ARMCompute_FOUND) + +file(GLOB SOURCES "*.cc") + +add_library(${LIB_NEURUN_BACKEND_ACL_COMMON} STATIC ${SOURCES}) + +target_include_directories(${LIB_NEURUN_BACKEND_ACL_COMMON} PUBLIC ${NEURUN_INCLUDE_DIR}) +target_include_directories(${LIB_NEURUN_BACKEND_ACL_COMMON} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(${LIB_NEURUN_BACKEND_ACL_COMMON} PUBLIC arm_compute arm_compute_ex) +target_link_libraries(${LIB_NEURUN_BACKEND_ACL_COMMON} PUBLIC nnfw_lib_misc nnfw_lib_cpp14) +target_link_libraries(${LIB_NEURUN_BACKEND_ACL_COMMON} PRIVATE nnfw_common) +target_link_libraries(${LIB_NEURUN_BACKEND_ACL_COMMON} PRIVATE nnfw_coverage) + +set_target_properties(${LIB_NEURUN_BACKEND_ACL_COMMON} PROPERTIES POSITION_INDEPENDENT_CODE ON) +set_target_properties(${LIB_NEURUN_BACKEND_ACL_COMMON} PROPERTIES OUTPUT_NAME backend_acl_common) diff --git a/runtime/neurun/backend/acl_common/Convert.cc b/runtime/neurun/backend/acl_common/Convert.cc new file mode 100644 index 000000000..fc5be72a2 --- /dev/null +++ b/runtime/neurun/backend/acl_common/Convert.cc @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Convert.h" + +#include "Swizzle.h" +#include "ir/DataType.h" +#include <cpp14/memory.h> + +namespace +{ + +::arm_compute::DataLayout asDataLayout(neurun::ir::Layout layout) +{ + switch (layout) + { + case neurun::ir::Layout::NHWC: + return ::arm_compute::DataLayout::NHWC; + case neurun::ir::Layout::NCHW: + return ::arm_compute::DataLayout::NCHW; + default: + return ::arm_compute::DataLayout::UNKNOWN; + } +} + +} // namespace + +namespace neurun +{ +namespace backend +{ +namespace acl_common +{ + +::arm_compute::TensorShape asTensorShape(const ir::Shape &shape, ir::Layout frontend_layout, + ir::Layout backend_layout, bool apply_dim_correction) +{ + const uint32_t rank = shape.rank(); + + ::arm_compute::TensorShape res{}; + + res.set_num_dimensions(rank); + + for (uint32_t axis = 0; axis < rank; ++axis) + { + // NOTE In some cases, in incorrect dimensions is required. + // For example, intput_size is 1 in LSTM. The input-to-input weights([num_units, input_size]) of + // LSTM is used as the weight of the FullyConnected. + // The FullyConnected's weight must be greater or equal than 2-dimensions. + // However, if the dimension correction is applied to input_to_input_weights with input_size + // equal to 1, it will be changed to 1-D. + // So input_to_input_weights is not used by the weight of FullyConnected. + res.set(ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(), shape.dim(axis), + apply_dim_correction); + } + + return res; +} + +::arm_compute::Coordinates asTensorCoordinate(const ::neurun::util::Coordinates &coord, + ir::Layout frontend_layout, ir::Layout backend_layout) +{ + const uint32_t rank = coord.size(); + + ::arm_compute::Coordinates res{}; + + res.set_num_dimensions(rank); + + for (uint32_t axis = 0; axis < rank; ++axis) + { + res.set(ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(), coord[axis]); + } + + return res; +} + +::arm_compute::DataType asDataType(const ir::DataType type) +{ + switch (type) + { + case ir::DataType::FLOAT32: + return ::arm_compute::DataType::F32; + case ir::DataType::INT32: + return ::arm_compute::DataType::S32; + case ir::DataType::UINT32: + return ::arm_compute::DataType::U32; + case ir::DataType::QUANT8_ASYMM: + return ::arm_compute::DataType::QASYMM8; + case ir::DataType::BOOL8: + case ir::DataType::UINT8: + return ::arm_compute::DataType::U8; + case ir::DataType::QUANT8_SYMM: + return ::arm_compute::DataType::S8; + default: + throw std::runtime_error("Not supported, yet"); + break; + } +} + +::arm_compute::QuantizationInfo asQuantizationInfo(const float scale, const int32_t offset) +{ + return ::arm_compute::QuantizationInfo(scale, offset); +} + +::arm_compute::TensorInfo asTensorInfo(const ir::Shape &shape, const ir::TypeInfo &typeInfo, + ir::Layout frontend_layout, ir::Layout backend_layout, + bool apply_dim_correction) +{ + ::arm_compute::TensorInfo info( + asTensorShape(shape, frontend_layout, backend_layout, apply_dim_correction), 1, + asDataType(typeInfo.type()), asQuantizationInfo(typeInfo.scale(), typeInfo.offset())); + info.set_data_layout(asDataLayout(backend_layout)); + return info; +} + +::arm_compute::PadStrideInfo asPadStrideInfo(const ir::ExplicitPadding &padding, + const ir::Stride &stride) +{ + return ::arm_compute::PadStrideInfo{stride.horizontal, + stride.vertical, + padding.left, + padding.right, + padding.top, + padding.bottom, + ::arm_compute::DimensionRoundingType::FLOOR}; +} + +::arm_compute::ActivationLayerInfo asActivationLayerInfo(const ir::Activation act_code) +{ + switch (act_code) + { + case ir::Activation::NONE: + return ::arm_compute::ActivationLayerInfo{}; + case ir::Activation::RELU: + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; + case ir::Activation::RELU1: + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f}; + case ir::Activation::RELU6: + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f}; + // Cases for activation of LSTM. + case ir::Activation::TANH: + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f}; + case ir::Activation::SIGMOID: + // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0. + // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always + // 0(always sigmoid) regardless of values of the parameter. + // If ACL support non-sigmoid logistic, should fix param values. + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f}; + default: + throw std::runtime_error{"Not supported, yet"}; + break; + } +} + +std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunction> &&layer) +{ + return nnfw::cpp14::make_unique<AclFunction>(std::move(layer)); +} + +ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout) +{ + switch (data_layout) + { + case ::arm_compute::DataLayout::NHWC: + return ir::Layout::NHWC; + case ::arm_compute::DataLayout::NCHW: + return ir::Layout::NCHW; + default: + return ir::Layout::UNKNOWN; + } +} + +} // namespace acl_common +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/acl_common/Convert.h b/runtime/neurun/backend/acl_common/Convert.h new file mode 100644 index 000000000..4c638157b --- /dev/null +++ b/runtime/neurun/backend/acl_common/Convert.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_COMMON_CONVERT_H__ +#define __NEURUN_BACKEND_ACL_COMMON_CONVERT_H__ + +#include <arm_compute/core/TensorInfo.h> +#include <arm_compute/core/SubTensorInfo.h> +#include <arm_compute/core/TensorShape.h> + +#include "ir/Layout.h" +#include "ir/InternalType.h" +#include "ir/Operand.h" +#include "ir/Shape.h" +#include "ir/TypeInfo.h" +#include "misc/feature/Shape.h" +#include "misc/kernel/Shape.h" + +#include "util/Padding.h" +#include "util/Coordinates.h" + +#include "AclFunction.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_common +{ + +::arm_compute::TensorShape asTensorShape(const ir::Shape &shape, ir::Layout frontend_layout, + ir::Layout backend_layout, + bool apply_dim_correction = true); +::arm_compute::Coordinates asTensorCoordinate(const ::neurun::util::Coordinates &coord, + ir::Layout frontend_layout, + ir::Layout backend_layout); +::arm_compute::DataType asDataType(ir::DataType type); +::arm_compute::TensorInfo asTensorInfo(const ir::Shape &shape, const ir::TypeInfo &typeInfo, + ir::Layout frontend_layout, ir::Layout backend_layout, + bool apply_dim_correction = true); + +::arm_compute::PadStrideInfo asPadStrideInfo(const ir::ExplicitPadding &padding, + const ir::Stride &stride); + +::arm_compute::ActivationLayerInfo asActivationLayerInfo(ir::Activation act_code); + +std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunction> &&layer); + +ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout); + +} // namespace acl_common +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_COMMON_CONVERT_H__ diff --git a/runtime/neurun/backend/acl_common/IACLTensor.cc b/runtime/neurun/backend/acl_common/IACLTensor.cc new file mode 100644 index 000000000..70ffbdcf0 --- /dev/null +++ b/runtime/neurun/backend/acl_common/IACLTensor.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "IACLTensor.h" +#include "Convert.h" +#include "Swizzle.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_common +{ + +size_t IACLTensor::num_dimensions() const +{ + throw std::runtime_error("No definition of num_dimensions()"); + return 0; +} + +size_t IACLTensor::dimension(size_t index) const +{ + // Assume that the front is higher dimensional. + // i.g. N: 0, C: 1, H: 2, W: 3 for NCHW layout + // NOTE This tensor must not be applied dim correction + assert(num_dimensions() > index); + const ARMComputeAxis reversed{(static_cast<uint32_t>(num_dimensions() - index) - 1)}; + return info()->dimension(reversed.value()); +} + +size_t IACLTensor::calcOffset(const neurun::util::Coordinates &coords) const +{ + const auto rank = num_dimensions(); + assert(rank == coords.size()); + + ::arm_compute::Coordinates acl_coords; + for (uint32_t i = 0; i < rank; ++i) + { + const ARMComputeAxis reversed{static_cast<uint32_t>((rank - i) - 1)}; + acl_coords.set(reversed.value(), coords[i]); + } + + return info()->offset_element_in_bytes(acl_coords); +} + +ir::Layout IACLTensor::layout() const { return acl_common::asRuntimeLayout(info()->data_layout()); } + +} // namespace acl_common +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/acl_common/IACLTensor.h b/runtime/neurun/backend/acl_common/IACLTensor.h new file mode 100644 index 000000000..b0dcbb409 --- /dev/null +++ b/runtime/neurun/backend/acl_common/IACLTensor.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_COMMON_I_ACL_TENSOR_H__ +#define __NEURUN_BACKEND_ACL_COMMON_I_ACL_TENSOR_H__ + +#include <backend/operand/ITensor.h> +#include <arm_compute/core/ITensor.h> + +namespace neurun +{ +namespace backend +{ +namespace acl_common +{ + +class IACLTensor : public operand::ITensor +{ +public: + IACLTensor() = default; + IACLTensor(const IACLTensor &) = delete; + IACLTensor &operator=(const IACLTensor &) = delete; + IACLTensor(IACLTensor &&) = default; + IACLTensor &operator=(IACLTensor &&) = default; + +public: + uint8_t *buffer() const final { return handle()->buffer(); } + size_t total_size() const final { return info()->total_size(); } + size_t dimension(size_t index) const final; + size_t num_dimensions() const override; + size_t calcOffset(const neurun::util::Coordinates &coords) const final; + ir::Layout layout() const final; + bool has_padding() const override { return info()->has_padding(); } + +public: + virtual const arm_compute::ITensor *handle() const = 0; + virtual arm_compute::ITensor *handle() = 0; + + const arm_compute::ITensorInfo *info() const { return handle()->info(); } + arm_compute::ITensorInfo *info() { return handle()->info(); } + + arm_compute::DataType data_type() const { return info()->data_type(); } +}; + +} // namespace acl_common +} // namespace backend +} // namespace neurun + +#endif //__NEURUN_BACKEND_ACL_COMMON_I_ACL_TENSOR_H__ diff --git a/runtime/neurun/backend/acl_common/Swizzle.h b/runtime/neurun/backend/acl_common/Swizzle.h new file mode 100644 index 000000000..11874b592 --- /dev/null +++ b/runtime/neurun/backend/acl_common/Swizzle.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_COMMON_SWIZZLE_H__ +#define __NEURUN_BACKEND_ACL_COMMON_SWIZZLE_H__ + +#include <cassert> +#include <ir/Layout.h> + +namespace neurun +{ +namespace backend +{ +namespace acl_common +{ + +class ARMComputeAxis +{ +public: + ARMComputeAxis() = default; + +public: + explicit ARMComputeAxis(uint32_t value) : _value{value} + { + // DO NOTHING + } + +public: + uint32_t value(void) const { return _value; } + +private: + uint32_t _value; +}; + +// Convert axis in acl order +inline ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis, + const ir::Layout org_layout = ir::Layout::UNKNOWN, + const ir::Layout acl_layout = ir::Layout::UNKNOWN) +{ + assert(rank > axis); + + const ARMComputeAxis reversed{(rank - axis) - 1}; + + if (rank >= 4 && org_layout == ir::Layout::NHWC && acl_layout == ir::Layout::NCHW) + { + // NHWC -> WHCN + // DEPTH + if (0 == reversed.value()) + { + return ARMComputeAxis{2}; + } + // WIDTH + if (1 == reversed.value()) + { + return ARMComputeAxis{0}; + } + // HEIGHT + if (2 == reversed.value()) + { + return ARMComputeAxis{1}; + } + } + if (rank >= 4 && org_layout == ir::Layout::NCHW && acl_layout == ir::Layout::NHWC) + { + // NCHW -> CWHN + // WIDTH + if (0 == reversed.value()) + { + return ARMComputeAxis{1}; + } + // HEIGHT + if (1 == reversed.value()) + { + return ARMComputeAxis{2}; + } + // DEPTH + if (2 == reversed.value()) + { + return ARMComputeAxis{0}; + } + } + + return reversed; +} + +inline ::arm_compute::Coordinates +getARMComputeAxises(uint32_t rank, const ir::Layout org_layout = ir::Layout::UNKNOWN, + const ir::Layout acl_layout = ir::Layout::UNKNOWN) +{ + ::arm_compute::Coordinates res{}; + + res.set_num_dimensions(rank); + + for (uint32_t axis = 0; axis < rank; ++axis) + { + res.set(axis, ToARMComputeAxis(rank, axis, org_layout, acl_layout).value()); + } + + return res; +} + +// Restructure runtime_permutationVector to ACL_permutationVector +inline ::arm_compute::PermutationVector +getARMComputePermutationVector(uint32_t rank, const std::vector<int32_t> runtime_pv, + const ir::Layout org_layout = ir::Layout::UNKNOWN, + const ir::Layout acl_layout = ir::Layout::UNKNOWN) +{ + // rank upto 4 is supported + assert(rank <= 4); + assert(runtime_pv.size() > 0); + + int new_pv[4] = {0}; + ::arm_compute::Coordinates axises = getARMComputeAxises(rank, org_layout, acl_layout); + + for (uint32_t i = 0; i < rank; ++i) + { + new_pv[axises[i]] = ToARMComputeAxis(rank, runtime_pv[i], org_layout, acl_layout).value(); + } + + ::arm_compute::PermutationVector ACL_PV = + ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]}; + ACL_PV.set_num_dimensions(rank); + + return ACL_PV; +} + +template <typename T> +inline T ReorderBits(T in, size_t numOfBits, const ir::Layout org_layout = ir::Layout::UNKNOWN, + const ir::Layout acl_layout = ir::Layout::UNKNOWN) +{ + assert(numOfBits > 0); + T out = 0; + for (int32_t i = numOfBits - 1; i >= 0; --i) + { + const uint32_t toShift = + numOfBits - ToARMComputeAxis(numOfBits, i, org_layout, acl_layout).value() - 1; + out += ((in & 1) << toShift); + in >>= 1; + } + return out; +} + +} // namespace acl_common +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_COMMON_SWIZZLE_H__ diff --git a/runtime/neurun/backend/acl_common/TemplTensorBuilder.h b/runtime/neurun/backend/acl_common/TemplTensorBuilder.h new file mode 100644 index 000000000..bb43823ed --- /dev/null +++ b/runtime/neurun/backend/acl_common/TemplTensorBuilder.h @@ -0,0 +1,612 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__ +#define __NEURUN_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__ + +#include <memory> +#include <queue> + +#include <arm_compute/core/Types.h> +#include <backend/ITensorBuilder.h> +#include "ir/OperandIndexMap.h" +#include "AclTensorManager.h" +#include "cpp14/memory.h" +#include <util/Utils.h> + +namespace neurun +{ +namespace backend +{ +namespace acl_common +{ + +enum class UsesType +{ + FIRST, + LAST +}; + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +class TemplTensorBuilder : public ITensorBuilder +{ +public: + using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>; + + TemplTensorBuilder(T_AclTensorManager *tensor_mgr); + + /** + * @brief Register tensor information to allocate on ACL-CL backend + * @param[in] ind Operand index + * @param[in] info Tensor information + * @param[in] layout Tensor data layout + */ + void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout backend_layout, bool as_const) override; + /** + * @brief Register subtensor information to allocate on ACL-CL backend + * @param[in] ind Operand index + * @param[in] info Tensor information + */ + void registerSubTensorInfo(const ir::OperandIndex &ind, + const compiler::SubTensorInfo &info) override; + + void notifyFirstUse(const ir::OperandIndex &) override; + void notifyLastUse(const ir::OperandIndex &) override; + + bool isRegistered(const ir::OperandIndex &) const override; + + void prepare(void) override; + void allocateConsts() override; + void allocateNonconsts() override; + void postFunctionPrepare() override; + void finalize() override; + + std::shared_ptr<::neurun::backend::operand::ITensor> + tensorAt(const ir::OperandIndex &ind) override; + void iterate(const IterateFunction &fn) override; + + void preVisit(const ir::Operation &node) override; + void postVisit(const ir::Operation &node) override; + + std::unique_ptr<ITensorManager> releaseTensorManager(void) override; + + std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind); + /** + * @brief Check child tensor is allocated as subtensor of parent tensor + * @param[in] parent Index of parent + * @param[in] child Index of child + * @return @c true if child is allocated as subtensor of parent, otherwise @c false + */ + bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child); + + void dimCorrection(const ir::OperandIndex &index, bool apply_dim_correction); + + T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); } + + void setUsesCount(const ir::OperandIndex &index, size_t num_uses) + { + assert(_uses_count_map.find(index) != _uses_count_map.end() ? _uses_count_map[index] == num_uses + : true); + _uses_count_map[index] = num_uses; + } + +private: + void buildTensors(void); + void buildSubtensors(void); + void validate(void); + ir::OperandIndex findRootParent(ir::OperandIndex index); + +private: + ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map; + ir::OperandIndexMap<compiler::SubTensorInfo> _subtensor_info_map; + ir::OperandIndexMap<bool> _apply_dim_correction_map; + ir::OperandIndexMap<ir::Layout> _tensor_layout_map; + ir::OperandIndexMap<size_t> _uses_count_map; + + std::unique_ptr<T_AclTensorManager> _tensor_mgr; + ir::OperandIndexSequence _constants; + + // TODO Consider dividing TensorBuilder into Linear and others + const std::string _executor_str; + + // for linear executor + std::queue<std::pair<UsesType, ir::OperandIndex>> _uses_queue; + uint32_t _first_uses_num; + ir::OperandIndexMap<bool> _first_uses_visit; + + // for subtensors + ir::OperandIndexMap<uint32_t> _parent_def; + ir::OperandIndexMap<uint32_t> _parent_uses; +}; + +} // namespace acl_common +} // namespace backend +} // namespace neurun + +#include <cassert> +#include <stack> + +#include "Convert.h" + +#include "util/logging.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_common +{ + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::TemplTensorBuilder( + T_AclTensorManager *tensor_mgr) + : _tensor_mgr{tensor_mgr}, _executor_str(util::getConfigString(util::config::EXECUTOR)), + _first_uses_num(0) +{ + assert(_tensor_mgr); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::registerTensorInfo( + const ir::OperandIndex &ind, const ir::OperandInfo &info, ir::Layout backend_layout, + bool as_const) +{ + assert(_tensor_mgr->constTensors().size() == 0); + assert(_tensor_mgr->nonconstTensors().size() == 0); + + _tensor_info_map.emplace(ind, info); + _apply_dim_correction_map.emplace(ind, true); + _tensor_layout_map.insert({ind, backend_layout}); + if (as_const) + _constants.append(ind); + + assert(_first_uses_visit.find(ind) == _first_uses_visit.end()); + _first_uses_visit[ind] = false; +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::registerSubTensorInfo( + const ir::OperandIndex &ind, const compiler::SubTensorInfo &info) +{ + assert(_tensor_mgr->constTensors().size() == 0); + assert(_tensor_mgr->nonconstTensors().size() == 0); + + _subtensor_info_map.emplace(ind, info); + _apply_dim_correction_map.emplace(ind, true); + + assert(_first_uses_visit.find(ind) == _first_uses_visit.end()); + _first_uses_visit[ind] = false; + + const auto &parent_ind = info.parent(); + + // parent_def + _parent_def[parent_ind] = 1; + + // parent_use + if (_parent_uses.find(parent_ind) == _parent_uses.end()) + _parent_uses[parent_ind] = 1; // 1 means including parent it-self + _parent_uses[parent_ind]++; +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::notifyFirstUse( + const ir::OperandIndex &ind) +{ + _first_uses_num++; + _uses_queue.emplace(UsesType::FIRST, ind); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::notifyLastUse( + const ir::OperandIndex &ind) +{ + _uses_queue.emplace(UsesType::LAST, ind); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +bool TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::isRegistered( + const ir::OperandIndex &ind) const +{ + return _tensor_info_map.find(ind) != _tensor_info_map.end() || + _subtensor_info_map.find(ind) != _subtensor_info_map.end(); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::prepare(void) +{ + buildTensors(); + buildSubtensors(); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::allocateConsts(void) +{ + assert(_constants.size() == _tensor_mgr->constTensors().size()); + _tensor_mgr->allocateConsts(); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::allocateNonconsts(void) +{ + assert(_tensor_info_map.size() == _tensor_mgr->nonconstTensors().size() + _constants.size()); + _tensor_mgr->allocateNonconsts(); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::postFunctionPrepare(void) +{ + _tensor_mgr->tryDeallocConstants(); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::finalize(void) +{ + validate(); + _tensor_mgr->allocateInternalBufferManager(); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +std::shared_ptr<::neurun::backend::operand::ITensor> +TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::tensorAt(const ir::OperandIndex &ind) +{ + return _tensor_mgr->at(ind); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::iterate(const IterateFunction &fn) +{ + _tensor_mgr->iterate(fn); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +std::shared_ptr<T_ITensor> +TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::at(const ir::OperandIndex &ind) +{ + return _tensor_mgr->at(ind); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +bool TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::isSubTensorOf( + const ir::OperandIndex &parent, const ir::OperandIndex &child) +{ + if (_subtensor_info_map.find(child) == _subtensor_info_map.end()) + { + return false; + } + + auto &subtensors = _tensor_mgr->nonconstSubtensors(); + if (subtensors.find(child) == subtensors.end()) + { + return false; + } + + if (_subtensor_info_map.at(child).parent() != parent) + { + return false; + } + + return true; +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::dimCorrection( + const ir::OperandIndex &index, bool apply_dim_correction) +{ + _apply_dim_correction_map[index] = apply_dim_correction; +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +std::unique_ptr<ITensorManager> +TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::releaseTensorManager(void) +{ + return std::move(_tensor_mgr); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void) +{ + assert(_tensor_mgr->constTensors().size() == 0); + assert(_tensor_mgr->nonconstTensors().size() == 0); + + for (auto &entry : _tensor_info_map) + { + auto ind = entry.first; + const auto &info = entry.second; + // NOTE SubTensor's layout must be the same with layout of parent tensor + const auto &root_parent = findRootParent(ind); + const auto &backend_layout = _tensor_layout_map[root_parent]; + auto tensor_info = asTensorInfo(info.shape(), info.typeInfo(), ir::Layout::UNKNOWN, + backend_layout, _apply_dim_correction_map[ind]); + _tensor_mgr->buildTensor(ind, tensor_info, info.shape().rank(), _constants.contains(ind), + _uses_count_map[ind]); + } +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildSubtensors(void) +{ + // TODO Handle SubTensor(subsumption) + // Currently this TemplTensorBuilder does not have subsumption info yet + // Allocated subtensor will be mapped to _subtensors instead of _tensors + assert(_tensor_mgr->nonconstSubtensors().size() == 0); + + // To make subtensor, parent tensor must be made first + // For this condition, use stack + // 1) Push one subtensor index to stack (iterate subtensors) + // 2) If tensor at stack top is already made, pop and go to 4) + // 3) If tensor pushed at 1) is not made, check parent tensor + // 3-1) If parent tensor is already made, we can make child tensor + // Make child tensor and pop, go to 4) + // 3-2) If parent tensor is not made, we can't make child tensor yet + // Push parent tensor index to stack and return to 4) + // 4) If stack is empty, return to 1), else return to 2) + auto &subtensors = _tensor_mgr->nonconstSubtensors(); + for (auto &entry : _subtensor_info_map) + { + ir::OperandIndex ind = entry.first; + + std::stack<ir::OperandIndex> stack; + stack.push(ind); + + while (!stack.empty()) + { + const auto current = stack.top(); + const auto &info = _subtensor_info_map.at(current); + + // Already generated SubTensor + if (subtensors.find(current) != subtensors.end()) + { + stack.pop(); + continue; + } + + auto parent = info.parent(); + std::shared_ptr<T_ITensor> parent_tensor = _tensor_mgr->findTensorAsParent(parent); + if (!parent_tensor) + { + // Cannot find allocated parent tensor: allocate parent first + assert(_subtensor_info_map.find(parent) != _subtensor_info_map.end()); + stack.push(parent); + continue; + } + assert(parent_tensor != nullptr); + + // Child's type should be same with parent + assert(info.type().offset() == parent_tensor->info()->quantization_info().offset); + assert(info.type().scale() == parent_tensor->info()->quantization_info().scale); + assert(asDataType(info.type().type()) == parent_tensor->info()->data_type()); + + // NOTE SubTensor's layout must be the same with layout of parent tensor + const auto &root_parent = findRootParent(parent); + const auto &backend_layout = _tensor_layout_map[root_parent]; + + auto shape = asTensorShape(info.shape(), ir::Layout::UNKNOWN, backend_layout, + _apply_dim_correction_map[current]); + ::arm_compute::Coordinates coordinates = + asTensorCoordinate(info.offset(), ir::Layout::UNKNOWN, backend_layout); + _tensor_mgr->buildSubtensor(parent, current, shape, coordinates, info.shape().rank(), true); + stack.pop(); + } + } +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::preVisit(const ir::Operation &node) +{ + // For now others executor doesn't need this step + if (_executor_str != "Linear") + { + return; + } + + std::function<void(const ir::OperandIndex &ind)> def_handler = + [this, &def_handler](const ir::OperandIndex &ind) { + bool is_subtensor = _subtensor_info_map.find(ind) != _subtensor_info_map.end(); + bool is_parent = _parent_def.find(ind) != _parent_def.end(); + if (!is_subtensor && !is_parent) + { + _tensor_mgr->startLifetime(ind); + return; + } + + if (is_parent) + { + if (_parent_def[ind] == 0) + return; + + _parent_def[ind] = 0; + + if (is_subtensor) + { + const auto &it = _parent_def.find(ind); + _parent_def.erase(it); + def_handler(ind); + } + else + { + _tensor_mgr->startLifetime(ind); + } + } + else if (is_subtensor) + { + const ir::OperandIndex &parent_ind = _subtensor_info_map.at(ind).parent(); + if (_parent_def[parent_ind] == 0) + return; + def_handler(parent_ind); + } + }; + + // See #5642 + ir::OperandIndexMap<bool> outputs_map; + for (const auto &ind : node.getOutputs()) + { + assert(_first_uses_visit.find(ind) != _first_uses_visit.end()); + outputs_map[ind] = _first_uses_visit[ind]; + } + + // outputs_map's all elements are true? + auto outputs_map_all_check = [&outputs_map]() { + return std::all_of(outputs_map.begin(), outputs_map.end(), + [](std::pair<const ir::OperandIndex, bool> it) { return it.second; }); + }; + + std::pair<UsesType, ir::OperandIndex> peak; + while (!outputs_map_all_check() && (peak = _uses_queue.front()).first == UsesType::FIRST) + { + _uses_queue.pop(); + _first_uses_num--; + + const auto &popped_idx = peak.second; + def_handler(popped_idx); + + outputs_map[popped_idx] = true; + _first_uses_visit[popped_idx] = true; + } +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::postVisit(const ir::Operation &node) +{ + // For now others executor doesn't need this step + if (_executor_str != "Linear") + { + return; + } + + std::function<void(const ir::OperandIndex &ind)> use_handler = + [this, &use_handler](const ir::OperandIndex &ind) { + bool is_subtensor = _subtensor_info_map.find(ind) != _subtensor_info_map.end(); + bool is_parent = _parent_uses.find(ind) != _parent_uses.end(); + if (!is_subtensor && !is_parent) + { + _tensor_mgr->finishLifetime(ind); + return; + } + + // This handler shall be executed by the linear executor so that + // The parent operand will always be done after the subtensor + if (is_parent) + { + --_parent_uses[ind]; + assert(_parent_uses[ind] == 0); + + if (is_subtensor) + { + const auto &it = _parent_uses.find(ind); + _parent_uses.erase(it); + use_handler(ind); + } + else + { + _tensor_mgr->finishLifetime(ind); + } + } + else if (is_subtensor) + { + const ir::OperandIndex &parent_ind = _subtensor_info_map.at(ind).parent(); + --_parent_uses[parent_ind]; + assert(_parent_uses[parent_ind] > 0); + } + }; + + // See #5642 + const auto &inputs = node.getInputs(); + std::pair<UsesType, ir::OperandIndex> peak; + while ((peak = _uses_queue.front()).first == UsesType::LAST) + { + const auto &popped_idx = peak.second; + if (inputs.contains(popped_idx)) + { + _uses_queue.pop(); + use_handler(popped_idx); + } + else + { + break; + } + } + + if (_first_uses_num == 0) + { + while (!_uses_queue.empty()) + { + peak = _uses_queue.front(); + assert(peak.first == UsesType::LAST); + + _uses_queue.pop(); + + use_handler(peak.second); + } + } +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::validate(void) +{ + // For now others executor doesn't need this step + if (_executor_str != "Linear") + { + return; + } + + for (auto it : _tensor_info_map) + { + assert(_first_uses_visit.find(it.first) != _first_uses_visit.end()); + assert(_first_uses_visit[it.first]); + } + + for (auto it : _subtensor_info_map) + { + assert(_first_uses_visit.find(it.first) != _first_uses_visit.end()); + assert(_first_uses_visit[it.first]); + } + + for (auto it : _tensor_layout_map) + { + assert(_first_uses_visit.find(it.first) != _first_uses_visit.end()); + assert(_first_uses_visit[it.first]); + UNUSED_RELEASE(it); + } + + assert(_uses_queue.size() == 0); + assert(_first_uses_num == 0); + + assert( + std::all_of(_parent_def.begin(), _parent_def.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); + + assert( + std::all_of(_parent_uses.begin(), _parent_uses.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); +} + +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> +ir::OperandIndex +TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::findRootParent(ir::OperandIndex ind) +{ + if (_subtensor_info_map.find(ind) == _subtensor_info_map.end()) + return ind; + + const auto &parent_ind = _subtensor_info_map.at(ind).parent(); + return findRootParent(parent_ind); +} + +} // namespace acl_common +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__ diff --git a/runtime/neurun/backend/acl_neon/Backend.h b/runtime/neurun/backend/acl_neon/Backend.h new file mode 100644 index 000000000..2fcf66933 --- /dev/null +++ b/runtime/neurun/backend/acl_neon/Backend.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_NEON_BACKEND_H__ +#define __NEURUN_BACKEND_ACL_NEON_BACKEND_H__ + +#include <memory> +#include <backend/Backend.h> +#include <ir/Operands.h> + +#include "Config.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" +#include "ShapeFixer.h" +#include "TensorManager.h" +#include "TensorRegister.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ + +class Backend : public ::neurun::backend::Backend +{ +public: + Backend() : _config{std::make_shared<Config>()} {} + + std::shared_ptr<IConfig> config() const override { return _config; } + + std::unique_ptr<BackendContext> + newContext(const ir::Operands &operands, + const std::shared_ptr<custom::IKernelBuilder> &) const override + { + auto tensor_builder = std::make_shared<TensorBuilder>(createTensorManager()); + return std::unique_ptr<BackendContext>{new BackendContext{ + this, tensor_builder, std::make_shared<ConstantInitializer>(operands, tensor_builder), + std::make_shared<KernelGenerator>(operands, tensor_builder), + std::make_shared<ShapeFixer>(operands, tensor_builder), + std::make_shared<TensorRegister>(operands, tensor_builder)}}; + } + +private: + std::shared_ptr<IConfig> _config; +}; + +} // namespace acl_neon +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_NEON_BACKEND_H__ diff --git a/runtime/neurun/backend/acl_neon/CMakeLists.txt b/runtime/neurun/backend/acl_neon/CMakeLists.txt new file mode 100644 index 000000000..061246d36 --- /dev/null +++ b/runtime/neurun/backend/acl_neon/CMakeLists.txt @@ -0,0 +1,21 @@ +# Unsupported architecture +nnas_find_package(ARMCompute QUIET) +if(NOT ARMCompute_FOUND) + return() +endif(NOT ARMCompute_FOUND) + +set(LIB_NEURUN_BACKEND_ACL_NEON neurun_backend_acl_neon) + +file(GLOB_RECURSE SOURCES "*.cc") + +add_library(${LIB_NEURUN_BACKEND_ACL_NEON} SHARED ${SOURCES}) + +target_include_directories(${LIB_NEURUN_BACKEND_ACL_NEON} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(${LIB_NEURUN_BACKEND_ACL_NEON} PRIVATE neurun_core) +target_link_libraries(${LIB_NEURUN_BACKEND_ACL_NEON} PRIVATE ${LIB_NEURUN_BACKEND_ACL_COMMON}) +target_link_libraries(${LIB_NEURUN_BACKEND_ACL_NEON} PRIVATE nnfw_common) +target_link_libraries(${LIB_NEURUN_BACKEND_ACL_NEON} PRIVATE nnfw_coverage) + +set_target_properties(${LIB_NEURUN_BACKEND_ACL_NEON} PROPERTIES OUTPUT_NAME backend_acl_neon) + +install(TARGETS ${LIB_NEURUN_BACKEND_ACL_NEON} DESTINATION lib) diff --git a/runtime/neurun/backend/acl_neon/Config.cc b/runtime/neurun/backend/acl_neon/Config.cc new file mode 100644 index 000000000..352bc0b41 --- /dev/null +++ b/runtime/neurun/backend/acl_neon/Config.cc @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Config.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ + +bool Config::initialize() { return true; } + +} // namespace acl_neon +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/acl_neon/Config.h b/runtime/neurun/backend/acl_neon/Config.h new file mode 100644 index 000000000..430c194ee --- /dev/null +++ b/runtime/neurun/backend/acl_neon/Config.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_NEON_CONFIG_H__ +#define __NEURUN_BACKEND_ACL_NEON_CONFIG_H__ + +#include <backend/IConfig.h> +#include <cpp14/memory.h> +#include <util/ITimer.h> + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ + +class Config : public IConfig +{ +public: + std::string id() override { return "acl_neon"; } + bool initialize() override; + bool SupportPermutation() override { return true; } + bool SupportSubTensorAlloc() override { return true; } + + std::unique_ptr<util::ITimer> timer() override + { + return nnfw::cpp14::make_unique<util::CPUTimer>(); + } +}; + +} // namespace acl_neon +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_NEON_CONFIG_H__ diff --git a/runtime/neurun/backend/acl_neon/ConstantInitializer.cc b/runtime/neurun/backend/acl_neon/ConstantInitializer.cc new file mode 100644 index 000000000..9a74bda29 --- /dev/null +++ b/runtime/neurun/backend/acl_neon/ConstantInitializer.cc @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConstantInitializer.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ + +ConstantInitializer::ConstantInitializer(const ir::Operands &operands, + const std::shared_ptr<TensorBuilder> &tensor_builder) + : _operands{operands}, _tensor_builder{tensor_builder} +{ + // DO NOTHING +} + +void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node) +{ + const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE); + const auto &block_size_obj = _operands.at(block_size_index); + + if (block_size_obj.isConstant()) + { + _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::operand::ITensor &obj) { + const auto &shape = model_obj.shape(); + const auto base = reinterpret_cast<const int32_t *>(model_obj.data().base()); + assert(model_obj.shape().rank() == 1); + obj.access([&](::neurun::backend::operand::ITensor &tensor) { + for (size_t i = 0; i < shape.num_elements(); ++i) + { + const int32_t value = base[shape.num_elements() - i - 1]; + int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() + + tensor.calcOffset({static_cast<int32_t>(i)})); + *into = value; + } + }); + }; + } +} + +void ConstantInitializer::visit(const ir::operation::Conv2D &node) +{ + const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL); + const auto &kernel_obj = _operands.at(kernel_index); + registerPermuteInitializer(kernel_index, kernel_obj); + + const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS); + const auto &bias_obj = _operands.at(bias_index); + registerCopyInitializer(bias_index, bias_obj); +} + +void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node) +{ + const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL); + const auto &kernel_obj = _operands.at(kernel_index); + registerPermuteInitializer(kernel_index, kernel_obj); + + const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS); + const auto &bias_obj = _operands.at(bias_index); + registerCopyInitializer(bias_index, bias_obj); +} + +void ConstantInitializer::visit(const ir::operation::FullyConnected &node) +{ + const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT); + const auto &weight_obj = _operands.at(weight_index); + registerCopyInitializer(weight_index, weight_obj); + + const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS); + const auto &bias_obj = _operands.at(bias_index); + registerCopyInitializer(bias_index, bias_obj); +} + +void ConstantInitializer::visit(const ir::operation::LSTM &node) +{ + const auto &input_to_input_weights_index = + node.getInputs().at(ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS); + const auto &input_to_input_weights_obj = _operands.at(input_to_input_weights_index); + registerCopyInitializer(input_to_input_weights_index, input_to_input_weights_obj); + + const auto &input_to_forget_weights_index = + node.getInputs().at(ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS); + const auto &input_to_forget_weights_obj = _operands.at(input_to_forget_weights_index); + registerCopyInitializer(input_to_forget_weights_index, input_to_forget_weights_obj); + + const auto &input_to_cell_weights_index = + node.getInputs().at(ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS); + const auto &input_to_cell_weights_obj = _operands.at(input_to_cell_weights_index); + registerCopyInitializer(input_to_cell_weights_index, input_to_cell_weights_obj); + + const auto &input_to_output_weights_index = + node.getInputs().at(ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS); + const auto &input_to_output_weights_obj = _operands.at(input_to_output_weights_index); + registerCopyInitializer(input_to_output_weights_index, input_to_output_weights_obj); + + const auto &recurrent_to_input_weights_index = + node.getInputs().at(ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS); + const auto &recurrent_to_input_weights_obj = _operands.at(recurrent_to_input_weights_index); + registerCopyInitializer(recurrent_to_input_weights_index, recurrent_to_input_weights_obj); + + const auto &recurrent_to_forget_weights_index = + node.getInputs().at(ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS); + const auto &recurrent_to_forget_weights_obj = _operands.at(recurrent_to_forget_weights_index); + registerCopyInitializer(recurrent_to_forget_weights_index, recurrent_to_forget_weights_obj); + + const auto &recurrent_to_cell_weights_index = + node.getInputs().at(ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS); + const auto &recurrent_to_cell_weights_obj = _operands.at(recurrent_to_cell_weights_index); + registerCopyInitializer(recurrent_to_cell_weights_index, recurrent_to_cell_weights_obj); + + const auto &recurrent_to_output_weights_index = + node.getInputs().at(ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS); + const auto &recurrent_to_output_weights_obj = _operands.at(recurrent_to_output_weights_index); + registerCopyInitializer(recurrent_to_output_weights_index, recurrent_to_output_weights_obj); + + const auto &cell_to_input_weights_index = + node.getInputs().at(ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS); + const auto &cell_to_input_weights_obj = _operands.at(cell_to_input_weights_index); + registerCopyInitializer(cell_to_input_weights_index, cell_to_input_weights_obj); + + const auto &cell_to_forget_weights_index = + node.getInputs().at(ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS); + const auto &cell_to_forget_weights_obj = _operands.at(cell_to_forget_weights_index); + registerCopyInitializer(cell_to_forget_weights_index, cell_to_forget_weights_obj); + + const auto &cell_to_output_weights_index = + node.getInputs().at(ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS); + const auto &cell_to_output_weights_obj = _operands.at(cell_to_output_weights_index); + registerCopyInitializer(cell_to_output_weights_index, cell_to_output_weights_obj); + + const auto &input_gate_bias_index = node.getInputs().at(ir::operation::LSTM::INPUT_GATE_BIAS); + const auto &input_gate_bias_obj = _operands.at(input_gate_bias_index); + registerCopyInitializer(input_gate_bias_index, input_gate_bias_obj); + + const auto &forget_gate_bias_index = node.getInputs().at(ir::operation::LSTM::FORGET_GATE_BIAS); + const auto &forget_gate_bias_obj = _operands.at(forget_gate_bias_index); + registerCopyInitializer(forget_gate_bias_index, forget_gate_bias_obj); + + const auto &output_gate_bias_index = node.getInputs().at(ir::operation::LSTM::OUTPUT_GATE_BIAS); + const auto &output_gate_bias_obj = _operands.at(output_gate_bias_index); + registerCopyInitializer(output_gate_bias_index, output_gate_bias_obj); + + const auto &projection_weights_index = + node.getInputs().at(ir::operation::LSTM::PROJECTION_WEIGHTS); + const auto &projection_weights_obj = _operands.at(projection_weights_index); + registerCopyInitializer(projection_weights_index, projection_weights_obj); + + const auto &projection_bias_index = node.getInputs().at(ir::operation::LSTM::PROJECTION_BIAS); + const auto &projection_bias_obj = _operands.at(projection_bias_index); + registerCopyInitializer(projection_bias_index, projection_bias_obj); +} + +void ConstantInitializer::visit(const ir::operation::RNN &node) +{ + const auto &weights_index = node.getInputs().at(ir::operation::RNN::WEIGHTS); + const auto &weights_obj = _operands.at(weights_index); + registerCopyInitializer(weights_index, weights_obj); + + const auto &recurrent_weights_index = node.getInputs().at(ir::operation::RNN::RECURRENT_WEIGHTS); + const auto &recurrent_weights_obj = _operands.at(recurrent_weights_index); + registerCopyInitializer(recurrent_weights_index, recurrent_weights_obj); + + const auto &bias_index = node.getInputs().at(ir::operation::RNN::BIAS); + const auto &bias_obj = _operands.at(bias_index); + registerCopyInitializer(bias_index, bias_obj); +} + +void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node) +{ + const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE); + const auto &block_size_obj = _operands.at(block_size_index); + + if (block_size_obj.isConstant()) + { + _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::operand::ITensor &obj) { + const auto &shape = model_obj.shape(); + const auto base = reinterpret_cast<const int32_t *>(model_obj.data().base()); + assert(model_obj.shape().rank() == 1); + obj.access([&](::neurun::backend::operand::ITensor &tensor) { + for (size_t i = 0; i < shape.num_elements(); ++i) + { + const int32_t value = base[shape.num_elements() - i - 1]; + int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() + + tensor.calcOffset({static_cast<int32_t>(i)})); + *into = value; + } + }); + }; + } + + const auto &paddings_index = node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS); + const auto &paddings_obj = _operands.at(paddings_index); + if (paddings_obj.isConstant()) + { + _init_map[paddings_index] = [](const ir::Operand &model_obj, backend::operand::ITensor &obj) { + const auto &shape = model_obj.shape(); + const auto base = reinterpret_cast<const int32_t *>(model_obj.data().base()); + assert(model_obj.shape().rank() == 2); + assert(shape.dim(0) == 2); + assert(shape.dim(1) == 2); + obj.access([&](::neurun::backend::operand::ITensor &tensor) { + for (auto i = 0; i < shape.dim(0); ++i) + { + for (auto j = 0; j < shape.dim(1); ++j) + { + const int32_t value = base[i * 2 + j]; + int32_t *into = reinterpret_cast<int32_t *>( + // The coordinates of NETensor are different from the coordiantes of CLTensor in + // this operand. + // NEON : {j, reversed i} + // CL : {reversed i, j} + tensor.buffer() + tensor.calcOffset({j, shape.dim(0) - i - 1})); + *into = value; + } + } + }); + }; + } +} + +void ConstantInitializer::visit(const ir::operation::TransposeConv &node) +{ + const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL); + const auto &kernel_obj = _operands.at(kernel_index); + registerPermuteInitializer(kernel_index, kernel_obj); +} + +} // namespace acl_neon +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/acl_neon/ConstantInitializer.h b/runtime/neurun/backend/acl_neon/ConstantInitializer.h new file mode 100644 index 000000000..0f2b2d05b --- /dev/null +++ b/runtime/neurun/backend/acl_neon/ConstantInitializer.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__ +#define __NEURUN_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__ + +#include <backend/IConstantInitializer.h> +#include <ir/Operands.h> +#include "TensorBuilder.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ + +class ConstantInitializer : public IConstantInitializer +{ +public: + ConstantInitializer(const ir::Operands &operands, + const std::shared_ptr<TensorBuilder> &tensor_builder); + +public: + void visit(const ir::operation::BatchToSpaceND &) override; + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::LSTM &) override; + void visit(const ir::operation::RNN &) override; + void visit(const ir::operation::SpaceToBatchND &) override; + void visit(const ir::operation::TransposeConv &) override; + +private: + const ir::Operands &operands() const override { return _operands; } + std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; } + +private: + const ir::Operands &_operands; + std::shared_ptr<TensorBuilder> _tensor_builder; +}; + +} // namespace acl_neon +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__ diff --git a/runtime/neurun/backend/acl_neon/KernelGenerator.cc b/runtime/neurun/backend/acl_neon/KernelGenerator.cc new file mode 100644 index 000000000..85c6a0633 --- /dev/null +++ b/runtime/neurun/backend/acl_neon/KernelGenerator.cc @@ -0,0 +1,2152 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "KernelGenerator.h" + +#include <arm_compute/runtime/NEON/NEFunctions.h> // Include all ARM Compute NEON functions +#include <arm_compute/runtime/NEON/NEFunctionsEx.h> // Include all ARM Compute EX NEON functions + +#include <Convert.h> +#include <Swizzle.h> + +#include "util/Padding.h" +#include "ir/Index.h" +#include "ir/DataType.h" +#include "ir/InternalType.h" +#include "compiler/IExecutionBuilder.h" +#include "exec/NopFunction.h" +#include "util/logging.h" +#include "util/Utils.h" + +using ::neurun::compiler::IExecutionBuilder; + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ + +using ::neurun::backend::acl_common::asAclFunction; + +// +// ActivationBuilder +// +class ActivationBuilder +{ +public: + ActivationBuilder(IExecutionBuilder &builder) : _builder(builder) + { + // DO NOTHING + } + +private: + void appendReLU(::arm_compute::ITensor *ifm_alloc); + void appendReLU1(::arm_compute::ITensor *ifm_alloc); + void appendReLU6(::arm_compute::ITensor *ifm_alloc); + +public: + void append(ir::Activation act, ::arm_compute::ITensor *ifm_alloc); + +private: + IExecutionBuilder &_builder; +}; + +void ActivationBuilder::appendReLU(::arm_compute::ITensor *ifm_alloc) +{ + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>(); + + fn->configure(ifm_alloc, nullptr, act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _builder.append(std::move(acl_fn)); +} + +void ActivationBuilder::appendReLU1(::arm_compute::ITensor *ifm_alloc) +{ + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>(); + + fn->configure(ifm_alloc, nullptr, act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _builder.append(std::move(acl_fn)); +} + +void ActivationBuilder::appendReLU6(::arm_compute::ITensor *ifm_alloc) +{ + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>(); + + fn->configure(ifm_alloc, nullptr, act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _builder.append(std::move(acl_fn)); +} + +void ActivationBuilder::append(ir::Activation act, ::arm_compute::ITensor *ifm_alloc) +{ + switch (act) + { + case ir::Activation::NONE: + { + // DO NOTHING + break; + } + case ir::Activation::RELU: + { + appendReLU(ifm_alloc); + break; + } + case ir::Activation::RELU1: + { + appendReLU1(ifm_alloc); + break; + } + case ir::Activation::RELU6: + { + appendReLU6(ifm_alloc); + break; + } + default: + { + throw std::runtime_error("Not supported, yet"); + } + } +} + +// +// KernelGenerator +// +KernelGenerator::KernelGenerator(const ir::Operands &ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder) + : _ctx(ctx), _tensor_builder(tensor_builder), _current_subg_layout(ir::Layout::UNKNOWN) +{ + // DO NOTHING +} + +void KernelGenerator::visit(const ir::OpSequence &op_seq) +{ + _current_subg_layout = op_seq.getLayout(); + for (const auto &e : op_seq.operations()) + { + const auto &node = *(e.node); + _tensor_builder->preVisit(node); + node.accept(*this); + _tensor_builder->postVisit(node); + } +} + +void KernelGenerator::visit(const ir::operation::Abs &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>(); + + fn->configure(input_alloc->handle(), output_alloc->handle(), act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::ArgMax &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)}; + + const auto ifm_rank = node.param().rank; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto frontend_layout = _current_subg_layout; + auto backend_layout = ifm_alloc->layout(); + + int axis_value = node.param().axis; + if (axis_value < 0) + { + axis_value += ifm_rank; + } + assert(axis_value >= 0 && axis_value < ifm_rank); + const auto fixed_axis = + acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value(); + + // auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMinMaxLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMax>(); + + // NOTE + // if (ofm_alloc->info()->data_type() == arm_compute::DataType::S32) + //{ + ofm_alloc->info()->set_data_type(arm_compute::DataType::U32); + //} + fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle()); + // fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(), + // arm_compute::ReductionOperation::ARG_IDX_MAX); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)}; + const auto block_size_index{ + node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto block_size_alloc = _tensor_builder->at(block_size_index).get(); + + assert(_ctx.at(block_size_index).isConstant()); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEBatchToSpaceLayer>(); + + fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Cast &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NECast>(); + + auto input_sub_type = _ctx.at(ifm_index).typeInfo().type() == ir::DataType::BOOL8 + ? arm_compute::SubDataType::BOOL + : arm_compute::SubDataType::NONE; + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), input_sub_type); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Conv2D &node) +{ + using ir::operation::Conv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; + + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + + const auto stride = node.param().stride; + const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, + stride, ker_width, ker_height); + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ker_alloc = _tensor_builder->at(ker_index).get(); + auto bias_alloc = _tensor_builder->at(bias_index).get(); + + const auto conv_info = acl_common::asPadStrideInfo(padding, stride); + const auto act_info = acl_common::asActivationLayerInfo(activation); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEConvolutionLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); + + fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(), + conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info); + + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const ir::operation::DepthToSpace &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)}; + + auto block_size = node.param().block_size; + assert(block_size > 0); + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthToSpaceLayerEx>(); + + fn->configure(input_alloc->handle(), output_alloc->handle(), block_size); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) +{ + using ir::operation::DepthwiseConv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)}; + + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + // Kernel format is [1, kernel_height, kernel_width, depth_out]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + + const auto stride = node.param().stride; + const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, + stride, ker_width, ker_height); + const auto multiplier = node.param().multiplier; + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ker_alloc = _tensor_builder->at(ker_index).get(); + auto bias_alloc = _tensor_builder->at(bias_index).get(); + + const auto conv_info = acl_common::asPadStrideInfo(padding, stride); + const auto act_info = acl_common::asActivationLayerInfo(activation); + + if (ker_height == 3 && ker_width == 3) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer3x3>(); + + fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), + ofm_alloc->handle(), conv_info, multiplier, act_info); + + _execution_builder->append(asAclFunction(std::move(fn))); + } + else + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>(); + + fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), + ofm_alloc->handle(), conv_info, multiplier, act_info); + + _execution_builder->append(asAclFunction(std::move(fn))); + } +} + +void KernelGenerator::visit(const ir::operation::Dequantize &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDequantizationLayer>(); + + fn->configure(input_alloc->handle(), output_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::MaxPool2D &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)}; + + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + + const auto kh = node.param().kh; + const auto kw = node.param().kw; + const auto stride = node.param().stride; + const auto padding = + neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto activation = node.param().activation; + + VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl; + VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl; + VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl; + VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl; + VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl; + VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl; + VERBOSE(MaxPool2D) << "STRIDE_H: " << stride.vertical << std::endl; + VERBOSE(MaxPool2D) << "STRIDE_W: " << stride.horizontal << std::endl; + VERBOSE(MaxPool2D) << "PAD(T): " << padding.top << std::endl; + VERBOSE(MaxPool2D) << "PAD(B): " << padding.bottom << std::endl; + VERBOSE(MaxPool2D) << "PAD(L): " << padding.left << std::endl; + VERBOSE(MaxPool2D) << "PAD(R): " << padding.right << std::endl; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX, + ::arm_compute::Size2D{kw, kh}, + acl_common::asPadStrideInfo(padding, stride)}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPoolingLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append((std::move(acl_fn))); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::Mean &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Mean::Input::INPUT)}; + const auto &axes{node.param().axes}; + const auto keep_dims{node.param().keep_dims}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = ifm_alloc->layout(); + + // Convert to ACL axes taking into account negative values and possible duplicates. + std::set<std::uint32_t> acl_axes; + const int ifm_rank = node.param().rank; + for (int axis : axes) + { + if (axis < 0) + axis += ifm_rank; + acl_axes.insert( + acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value()); + } + + arm_compute::Coordinates fixed_axis; + for (const auto axis : acl_axes) + { + fixed_axis.set(fixed_axis.num_dimensions(), axis); + } + + // NOTE NEReduceMean has a bug that does not support NHWC layout + // NEReduceMean intermediate tensors are always NCHW layout + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEReduceMeanEx>(); + + fn->configure(ifm_alloc->handle(), fixed_axis, keep_dims, ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::AvgPool2D &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)}; + + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + + const auto kh = node.param().kh; + const auto kw = node.param().kw; + const auto stride = node.param().stride; + const auto padding = + neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto activation = node.param().activation; + + VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl; + VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl; + VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl; + VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl; + VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl; + VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl; + VERBOSE(AvgPool2D) << "STRIDE_H: " << stride.vertical << std::endl; + VERBOSE(AvgPool2D) << "STRIDE_W: " << stride.horizontal << std::endl; + VERBOSE(AvgPool2D) << "PAD(T): " << padding.top << std::endl; + VERBOSE(AvgPool2D) << "PAD(B): " << padding.bottom << std::endl; + VERBOSE(AvgPool2D) << "PAD(L): " << padding.left << std::endl; + VERBOSE(AvgPool2D) << "PAD(R): " << padding.right << std::endl; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + ::arm_compute::PoolingLayerInfo info{ + ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{kw, kh}, + acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPoolingLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append((std::move(acl_fn))); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::Concat &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + + std::vector<ir::OperandIndex> input_indexes; + for (const auto &input : node.getInputs()) + input_indexes.emplace_back(input); + + const auto axis = node.param().axis; + + // If tensor allocator allocate as subtensor + bool canEliminate = true; + for (auto ifm_ind : input_indexes) + { + if (!_tensor_builder->isSubTensorOf(ofm_index, ifm_ind)) + { + canEliminate = false; + break; + } + } + if (canEliminate) + { + // If concat eliminated, return a NOP IFunction + _execution_builder->append(nnfw::cpp14::make_unique<exec::NopFunction>()); + return; + } + + auto output_alloc = _tensor_builder->at(ofm_index).get(); + std::vector<::arm_compute::ITensor *> input_tensors; + for (const auto &ifm_ind : input_indexes) + input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle()); + + std::unique_ptr<::arm_compute::IFunction> fn; + if (input_indexes.size() < 2) + { + auto l = nnfw::cpp14::make_unique<::arm_compute::NECopy>(); + l->configure(input_tensors.at(0), output_alloc->handle()); + fn = std::move(l); + } + else + { + auto l = nnfw::cpp14::make_unique<::arm_compute::NEConcatenateLayer>(); + const auto rank = node.param().rank; + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = output_alloc->layout(); + const auto fixed_axis = + acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(); + l->configure(input_tensors, output_alloc->handle(), fixed_axis); + fn = std::move(l); + } + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)}; + const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto lookups_alloc = _tensor_builder->at(lookups_index).get(); + auto values_alloc = _tensor_builder->at(values_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEEmbeddingLookup>(); + + fn->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Floor &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEFloor>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::FullyConnected &node) +{ + using ir::operation::FullyConnected; + + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)}; + const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)}; + const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)}; + + const auto input_rank = _ctx.at(input_index).shape().rank(); + + const auto output_size = + _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1); + UNUSED_RELEASE(output_size); + assert(_ctx.at(bias_index).shape().dim(0) == output_size); + assert(_ctx.at(weight_index).shape().dim(0) == output_size); + const auto batch_size = + _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 2); + const auto input_size = + _ctx.at(weight_index).shape().dim(_ctx.at(weight_index).shape().rank() - 1); + + // Check for reshaping input's shape into rank-2 + bool needs_reshape = false; + ir::Shape reshape(2); + if (input_rank == 3 || input_rank == 4) + { + const auto &ifm_shape = _ctx.at(input_index).shape(); + auto feature_size = 1; + for (int i = 0; i < ifm_shape.rank(); ++i) + { + feature_size *= ifm_shape.dim(i); + } + + UNUSED_RELEASE(feature_size); + assert(feature_size == batch_size * input_size); + + // for reshaping + needs_reshape = true; + reshape.dim(0) = batch_size; /* H */ + reshape.dim(1) = input_size; /* W */ + } + + const auto activation = node.param().activation; + + auto output_alloc = _tensor_builder->at(output_index).get(); + const auto input_alloc = _tensor_builder->at(input_index).get(); + const auto weight_alloc = _tensor_builder->at(weight_index).get(); + const auto bias_alloc = _tensor_builder->at(bias_index).get(); + const auto frontend_layout = _current_subg_layout; + const auto acl_layout = output_alloc->handle()->info()->data_layout(); + + auto fn = nnfw::cpp14::make_unique<arm_compute::NEFullyConnectedReshapingLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); + + arm_compute::NEFullyConnectedReshapingLayer::KernelType kernel_type = + _ctx.at(weight_index).isConstant() + ? arm_compute::NEFullyConnectedReshapingLayer::KernelType::PREPROCESSED_WEIGHTS + : arm_compute::NEFullyConnectedReshapingLayer::KernelType::GENERAL; + + fn->configure( + input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(), + needs_reshape, + ::neurun::backend::acl_common::asTensorShape( + reshape, frontend_layout, ::neurun::backend::acl_common::asRuntimeLayout(acl_layout)), + kernel_type); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, output_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::HashtableLookup &node) +{ + const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)}; + const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)}; + + const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)}; + const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)}; + const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto hits_alloc = _tensor_builder->at(hits_index).get(); + + auto lookups_alloc = _tensor_builder->at(lookups_index).get(); + auto keys_alloc = _tensor_builder->at(keys_index).get(); + auto values_alloc = _tensor_builder->at(values_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEHashtableLookup>(); + + fn->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(), + output_alloc->handle(), hits_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Gather &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + + const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)}; + const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)}; + + const auto ifm_shape = _ctx.at(ifm_index).shape(); + + const auto ifm_rank = node.param().rank; + const auto axis_raw = node.param().axis; + const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw); + // Converting in reverse order + const int axis = ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value(); + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto indices_alloc = _tensor_builder->at(indices_index).get(); + const auto backend_layout = ofm_alloc->layout(); + UNUSED_RELEASE(backend_layout); + + // NOTE The frontend layout and backend layout must be the same for this operation. + // If not the same, we have to add a stage(?) to perform permutation of output tensor. It + // is not not efficient even if it works well. If so, it would be better to set the + // layout of these backend tensors to the same layout. + // There is also one thing we have to think about. This operation depends on the layout of + // a model. For example, if a model in NHWC has this operation as output rank == 4, indices + // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W + // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case. + assert(backend_layout == ifm_alloc->layout()); + assert(backend_layout == indices_alloc->layout()); + assert(ifm_rank < 4 || _current_subg_layout == backend_layout); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEGatherEx>(); + + fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::InstanceNorm &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)}; + const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)}; + const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto gamma_alloc = _tensor_builder->at(gamma_index).get(); + auto beta_alloc = _tensor_builder->at(beta_index).get(); + auto epsilon = node.param().epsilon; + auto activation = node.param().activation; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), gamma_alloc->handle(), + beta_alloc->handle(), epsilon); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::L2Normalization &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)}; + + // {CL|Neon}L2Normalization performs the reduction only along dimension 0 + // L2 Normalization always performs the reduction along the depth axis + // Thus, we repurpose {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by + // choosing normalization parameters as below + + const auto &ifm_shape = _ctx.at(ifm_index).shape(); + // TODO Support optional constant dimension that normalization would be performed on + const auto normalization_axis = node.param().rank - 1; + int32_t radius = + 2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1 + float alpha = 1.0f; // In the implementation to make alpha_ become 1 + float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction) + float bias = 0.0f; // Don't offset the reduction. + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP, + radius, alpha, beta, bias, false); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NENormalizationLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::L2Pool2D &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::L2Pool2D::Input::INPUT)}; + + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + + uint32_t kw = node.param().kw; + uint32_t kh = node.param().kh; + const auto stride = node.param().stride; + const auto padding = + neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + ::arm_compute::PoolingLayerInfo info{ + ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{kw, kh}, + ::neurun::backend::acl_common::asPadStrideInfo(padding, stride)}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPoolingLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{ + node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)}; + + auto radius = node.param().radius; + auto alpha = node.param().alpha; + auto beta = node.param().beta; + auto bias = node.param().bias; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + const auto norm_info = ::arm_compute::NormalizationLayerInfo( + ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NENormalizationLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::LogicalAnd &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)}; + const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input0_alloc = _tensor_builder->at(input0_index).get(); + auto input1_alloc = _tensor_builder->at(input1_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NELogicalAnd>(); + + fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::LogicalNot &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEBitwiseNot>(); + + fn->configure(input_alloc->handle(), output_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::LogicalOr &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)}; + const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input0_alloc = _tensor_builder->at(input0_index).get(); + auto input1_alloc = _tensor_builder->at(input1_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NELogicalOr>(); + + fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Logistic &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::LSTM &node) +{ + // TODO Support dynamic rnn + // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection. + const auto scratch_buffer_index{ + node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; + const auto output_state_out_index{ + node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; + const auto cell_state_out_index{ + node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)}; + const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)}; + + const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)}; + const auto input_to_input_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional + const auto input_to_forget_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)}; + const auto input_to_cell_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)}; + const auto input_to_output_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)}; + const auto recurrent_to_input_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional + const auto recurrent_to_forget_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)}; + const auto recurrent_to_cell_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)}; + const auto recurrent_to_output_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)}; + const auto cell_to_input_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional + const auto cell_to_forget_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional + const auto cell_to_output_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional + const auto input_gate_bias_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)}; + const auto forget_gate_bias_index{ + node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)}; + const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)}; + const auto output_gate_bias_index{ + node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)}; + const auto projection_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional + const auto projection_bias_index{ + node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional + const auto output_state_in_index{ + node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)}; + const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)}; + const auto cell_threshold = node.param().cell_threshold; + const auto projection_threshold = node.param().projection_threshold; + + bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 && + _ctx.at(input_to_input_weights_index).shape().dim(1) != 0; + bool has_recurrent_to_input_weights = + _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && + _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0; + bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0; + bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0; + bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 && + _ctx.at(projection_weights_index).shape().dim(1) != 0; + bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0); + + // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG. + // true: no CIFG + // false: CIFG + // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG). + bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights; + + // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole. + // But the cell_to_input_weights does not exist in regular CIFG although peephole. + // true: peephole + // false: no peephole + bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights; + + // NOTE Although the projection weights has data the projection bias may not have data. + bool has_projection_param = has_projection_weights; + + const auto activation = node.param().activation; + const auto cell_clip = cell_threshold; + const auto projection_clip = projection_threshold; + assert(cell_clip >= 0.f && projection_clip >= 0.f); + + auto scratch_buffer_alloc = _tensor_builder->at(scratch_buffer_index).get(); + auto output_state_out_alloc = _tensor_builder->at(output_state_out_index).get(); + auto cell_state_out_alloc = _tensor_builder->at(cell_state_out_index).get(); + auto output_alloc = _tensor_builder->at(output_index).get(); + + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto input_to_forget_weights_alloc = _tensor_builder->at(input_to_forget_weights_index).get(); + auto input_to_cell_weights_alloc = _tensor_builder->at(input_to_cell_weights_index).get(); + auto input_to_output_weights_alloc = _tensor_builder->at(input_to_output_weights_index).get(); + auto recurrent_to_forget_weights_alloc = + _tensor_builder->at(recurrent_to_forget_weights_index).get(); + auto recurrent_to_cell_weights_alloc = _tensor_builder->at(recurrent_to_cell_weights_index).get(); + auto recurrent_to_output_weights_alloc = + _tensor_builder->at(recurrent_to_output_weights_index).get(); + + auto forget_gate_bias_alloc = _tensor_builder->at(forget_gate_bias_index).get(); + auto cell_bias_alloc = _tensor_builder->at(cell_bias_index).get(); + auto output_gate_bias_alloc = _tensor_builder->at(output_gate_bias_index).get(); + auto output_state_in_alloc = _tensor_builder->at(output_state_in_index).get(); + auto cell_state_in_alloc = _tensor_builder->at(cell_state_in_index).get(); + + auto act_info = ::neurun::backend::acl_common::asActivationLayerInfo(activation); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NELSTMLayer>(); + + ::arm_compute::LSTMParams<::arm_compute::ITensor> lstm_params{}; + if (has_cifg_param) + { + auto input_to_input_weights_alloc = + _tensor_builder->at(input_to_input_weights_index).get(); // optional + auto recurrent_to_input_weights_alloc = + _tensor_builder->at(recurrent_to_input_weights_index).get(); // optional + auto cell_to_input_weights_handle = + has_peephole_param ? _tensor_builder->at(cell_to_input_weights_index).get()->handle() + : nullptr; // optional (non-cifg && peephole) + auto input_gate_bias_alloc = _tensor_builder->at(input_gate_bias_index).get(); // optional + lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(), + recurrent_to_input_weights_alloc->handle(), + cell_to_input_weights_handle, input_gate_bias_alloc->handle()); + } + if (has_peephole_param) + { + auto cell_to_forget_weights_alloc = + _tensor_builder->at(cell_to_forget_weights_index).get(); // optional + auto cell_to_output_weights_alloc = + _tensor_builder->at(cell_to_output_weights_index).get(); // optional + lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(), + cell_to_output_weights_alloc->handle()); + } + if (has_projection_param) + { + auto projection_weights_alloc = _tensor_builder->at(projection_weights_index).get(); // optional + auto projection_bias_handle = has_projection_bias + ? _tensor_builder->at(projection_bias_index).get()->handle() + : nullptr; // optional + lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle); + } + + fn->configure( + input_alloc->handle(), input_to_forget_weights_alloc->handle(), + input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(), + recurrent_to_forget_weights_alloc->handle(), recurrent_to_cell_weights_alloc->handle(), + recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(), + cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), output_state_in_alloc->handle(), + cell_state_in_alloc->handle(), scratch_buffer_alloc->handle(), + output_state_out_alloc->handle(), cell_state_out_alloc->handle(), output_alloc->handle(), + lstm_params, act_info, cell_clip, projection_clip); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Mul &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPixelWiseMultiplication>(); + + // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO + fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale + arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::Neg &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NENegLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Pack &node) +{ + const auto output_index{node.getOutputs().at(0)}; + auto axis{node.param().axis}; + + const auto output_rank = node.param().rank; + + std::vector<ir::OperandIndex> input_indexes; + for (const auto &input_index : node.getInputs()) + input_indexes.emplace_back(input_index); + + auto output = _tensor_builder->at(output_index).get()->handle(); + std::vector<arm_compute::ITensor *> inputs; + for (const auto &input_index : input_indexes) + inputs.emplace_back(_tensor_builder->at(input_index)->handle()); + + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = _tensor_builder->at(output_index).get()->layout(); + + if (axis < 0) + axis += output_rank; + axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEStackLayer>(); + + fn->configure(inputs, axis, output); + + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const ir::operation::Pad &node) +{ + const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)}; + const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)}; + const auto output_index{node.getOutputs().at(0)}; + assert(_ctx.at(pad_index).isConstant()); + + auto rank = node.param().rank; + auto pad_base = _ctx.at(pad_index).data().base(); + + auto input = _tensor_builder->at(input_index).get()->handle(); + auto output = _tensor_builder->at(output_index).get()->handle(); + + ::arm_compute::PaddingList padding_list; + padding_list.resize(rank); + for (int32_t n = 0; n < rank; ++n) + { + const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2); + + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = _tensor_builder->at(input_index).get()->layout(); + const auto axis = + acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value(); + padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]}; + } + + const auto input_type = _ctx.at(input_index).typeInfo(); + UNUSED_RELEASE(input_type); + assert(input->info()->data_type() == acl_common::asDataType(input_type.type())); + assert(input->info()->quantization_info() == + ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset())); + const auto pixel_value = + ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info()); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPadLayer>(); + fn->configure(input, output, padding_list, pixel_value); + + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const ir::operation::Permute &node) +{ + const auto ofm_idx{node.getOutputs().at(0)}; + const auto ifm_idx{node.getInputs().at(0)}; + const auto permute_type = node.getPermuteType(); + auto ofm_alloc = _tensor_builder->at(ofm_idx).get(); + auto ifm_alloc = _tensor_builder->at(ifm_idx).get(); + const auto rank = _ctx.at(ofm_idx).shape().rank(); + assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank()); + + std::unique_ptr<::arm_compute::IFunction> fn; + arm_compute::PermutationVector pv; + if (permute_type == ir::operation::Permute::Type::NCHW_TO_NHWC && rank == 4) + { + // WHCN -> CWHN + pv = arm_compute::PermutationVector{2, 0, 1}; + + auto l = nnfw::cpp14::make_unique<::arm_compute::NEPermute>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv); + + fn = std::move(l); + } + else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4) + { + // CWHN -> WHCN + pv = arm_compute::PermutationVector{1, 2, 0}; + + auto l = nnfw::cpp14::make_unique<::arm_compute::NEPermute>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv); + + fn = std::move(l); + } + else + { + auto l = nnfw::cpp14::make_unique<::arm_compute::NECopy>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle()); + + fn = std::move(l); + } + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::PReLU &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)}; + const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto alpha_alloc = _tensor_builder->at(alpha_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::NEPReLU>(); + + l->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle()); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::ReduceMax &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::ReduceMax::Input::INPUT)}; + const auto &axes{node.param().axes}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = ifm_alloc->layout(); + + // Convert to ACL axes taking into account negative values and possible duplicates. + std::set<std::uint32_t> acl_axes; + const int ifm_rank = node.param().rank; + for (int axis : axes) + { + if (axis < 0) + axis += ifm_rank; + acl_axes.insert( + acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value()); + } + + arm_compute::Coordinates reduce_axes; + for (const auto axis : acl_axes) + { + reduce_axes.set(reduce_axes.num_dimensions(), axis); + } + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEReduceOperation>(); + + fn->configure(ifm_alloc->handle(), reduce_axes, false, ofm_alloc->handle(), + ::arm_compute::ReduceOperation::MAX); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::ReduceMin &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::ReduceMin::Input::INPUT)}; + const auto &axes{node.param().axes}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = ifm_alloc->layout(); + + // Convert to ACL axes taking into account negative values and possible duplicates. + std::set<std::uint32_t> acl_axes; + const int ifm_rank = node.param().rank; + for (int axis : axes) + { + if (axis < 0) + axis += ifm_rank; + acl_axes.insert( + acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value()); + } + + arm_compute::Coordinates reduce_axes; + for (const auto axis : acl_axes) + { + reduce_axes.set(reduce_axes.num_dimensions(), axis); + } + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEReduceOperation>(); + + fn->configure(ifm_alloc->handle(), reduce_axes, false, ofm_alloc->handle(), + ::arm_compute::ReduceOperation::MIN); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::ReduceSum &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::ReduceSum::Input::INPUT)}; + const auto &axes{node.param().axes}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = input_alloc->layout(); + + // Convert to ACL axes taking into account negative values and possible duplicates. + std::set<std::uint32_t> acl_axes; + const int input_rank = node.param().rank; + for (int axis : axes) + { + if (axis < 0) + axis += input_rank; + acl_axes.insert( + acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value()); + } + + arm_compute::Coordinates fixed_axes; + for (const auto axis : acl_axes) + { + fixed_axes.set(fixed_axes.num_dimensions(), axis); + } + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEReduceSum>(); + + fn->configure(input_alloc->handle(), fixed_axes, false, output_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::ReLU &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<arm_compute::NEActivationLayer>(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; + + fn->configure(input_alloc->handle(), output_alloc->handle(), act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::ReLU1 &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::ReLU6 &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Reshape &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + // NOTE This operation must not be changed the layout from frontend to backend + // So, PermutationOperationPass makes layouts of frontend and backend the same. + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = output_alloc->layout(); + assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) || + frontend_layout == backend_layout); + UNUSED_RELEASE(frontend_layout); + UNUSED_RELEASE(backend_layout); + + auto fn = nnfw::cpp14::make_unique<arm_compute::NEReshapeLayer>(); + + fn->configure(input_alloc->handle(), output_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::ResizeBilinear &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + + const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEScale>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), + ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE, + ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::RNN &node) +{ + const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)}; + const auto hidden_state_out_index{ + node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)}; + + const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)}; + const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)}; + const auto recurrent_weights_index{ + node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)}; + const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)}; + const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)}; + + const auto activation = node.param().activation; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto hidden_state_out_alloc = _tensor_builder->at(hidden_state_out_index).get(); + + auto input_alloc = _tensor_builder->at(input_index).get(); + auto weights_alloc = _tensor_builder->at(weights_index).get(); + auto recurrent_weights_alloc = _tensor_builder->at(recurrent_weights_index).get(); + auto bias_alloc = _tensor_builder->at(bias_index).get(); + auto hidden_state_in_alloc = _tensor_builder->at(hidden_state_in_index).get(); + auto act_info = ::neurun::backend::acl_common::asActivationLayerInfo(activation); + + auto copy_layer = nnfw::cpp14::make_unique<::arm_compute::NECopy>(); + copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle()); + _execution_builder->append(asAclFunction(std::move(copy_layer))); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NERNNLayerEx>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); + fn->configure(input_alloc->handle(), weights_alloc->handle(), recurrent_weights_alloc->handle(), + bias_alloc->handle(), hidden_state_out_alloc->handle(), output_alloc->handle(), + act_info); + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const ir::operation::RSQRT &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NERsqrtLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle()); + + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const ir::operation::Squeeze &node) +{ + // Squeeze is identical to reshape except that it has an optional dimensions input. + // In addition, optional dims_index is ignored since output tensor already has squeezed shape + // by freezer and toco + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)}; + const auto dims{node.param().dims}; + const auto ndim{node.param().ndim}; + (void)dims; + (void)ndim; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + auto fn = nnfw::cpp14::make_unique<arm_compute::NEReshapeLayer>(); + fn->configure(input_alloc->handle(), output_alloc->handle()); + auto acl_fn = asAclFunction(std::move(fn)); + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Tanh &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<arm_compute::NEActivationLayer>(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f}; + + fn->configure(input_alloc->handle(), output_alloc->handle(), act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Softmax &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)}; + const auto beta = node.param().beta; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NESoftmaxLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); + + fn->configure(input_alloc->handle(), output_alloc->handle(), beta); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)}; + const auto block_size_index{ + node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; + const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto block_size_alloc = _tensor_builder->at(block_size_index).get(); + auto paddings_alloc = _tensor_builder->at(paddings_index).get(); + + assert(_ctx.at(block_size_index).isConstant()); + assert(_ctx.at(paddings_index).isConstant()); + + // NESpaceToBatchLayer has a bug that padding's values are 0 even when zero point of QASYMM8 is + // not 0. + auto fn = nnfw::cpp14::make_unique<::arm_compute::NESpaceToBatchLayerEx>(); + + fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(), + ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::SpaceToDepth &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)}; + + auto block_size = node.param().block_size; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NESpaceToDepthLayerEx>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), block_size); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Split &node) +{ + // TODO Support this op by SubTensor + const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)}; + + assert(node.param().num_splits == static_cast<int>(node.getOutputs().size())); + + const auto ifm_rank = node.param().rank; + std::vector<ir::OperandIndex> output_indexes; + for (const auto &output : node.getOutputs()) + output_indexes.emplace_back(output); + + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + std::vector<arm_compute::ITensor *> output_allocs; + for (const auto &ofm_ind : output_indexes) + output_allocs.emplace_back(_tensor_builder->at(ofm_ind).get()->handle()); + + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = ifm_alloc->layout(); + auto axis = node.param().axis; + if (axis < 0) + axis += ifm_rank; + axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NESplit>(); + + fn->configure(ifm_alloc->handle(), output_allocs, axis); + + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const ir::operation::SQRT &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>(); + + fn->configure(input_alloc->handle(), output_alloc->handle(), act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::SquaredDifference &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEElementwiseSquaredDiff>(); + + fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Sub &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArithmeticSubtraction>(); + + fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), + arm_compute::ConvertPolicy::SATURATE); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::Slice &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)}; + const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)}; + const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)}; + + auto outputData_alloc = _tensor_builder->at(output_index).get(); + auto inputData_alloc = _tensor_builder->at(input_index).get(); + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = inputData_alloc->layout(); + + // Set initializers for indices data such as order of inputData + int input_rank = node.param().rank; + std::vector<int32_t> starts; + std::vector<int32_t> ends; + starts.resize(input_rank, 0); + ends.resize(input_rank, 0); + { + auto beginData_base = _ctx.at(begins_index).data().base(); + auto sizeData_base = _ctx.at(sizes_index).data().base(); + const int beginData_size = _ctx.at(begins_index).shape().num_elements(); + const int sizeData_size = _ctx.at(sizes_index).shape().num_elements(); + + using ir::DataType; + + UNUSED_RELEASE(beginData_size); + UNUSED_RELEASE(sizeData_size); + + assert(_ctx.at(begins_index).typeInfo().type() == DataType::INT32); + assert(_ctx.at(sizes_index).typeInfo().type() == DataType::INT32); + assert(beginData_size == input_rank); + assert(sizeData_size == input_rank); + + assert(beginData_base != nullptr); + for (int n = 0; n < input_rank; ++n) + { + auto axis = ::neurun::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout, + backend_layout) + .value(); + + int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n); + starts[axis] = begin_value; + + int32_t size_value = *(reinterpret_cast<const int32_t *>(sizeData_base) + n); + ends[axis] = begin_value + size_value; + } + } + + ::arm_compute::Coordinates starts_set; + ::arm_compute::Coordinates ends_set; + + for (size_t i = 0; i < starts.size(); ++i) + { + starts_set.set(i, starts[i]); + ends_set.set(i, ends[i]); + } + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NESlice>(); + + fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::StridedSlice &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)}; + const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)}; + const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)}; + const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)}; + + auto outputData_alloc = _tensor_builder->at(output_index).get(); + auto inputData_alloc = _tensor_builder->at(input_index).get(); + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = inputData_alloc->layout(); + + // Set initializers for indices data such as order of inputData + int input_rank = node.param().rank; + std::vector<int32_t> starts; + std::vector<int32_t> ends; + std::vector<int32_t> strides; + starts.resize(input_rank, 0); + ends.resize(input_rank, 0); + strides.resize(input_rank, 0); + { + auto startData_base = _ctx.at(starts_index).data().base(); + auto endData_base = _ctx.at(ends_index).data().base(); + auto stridesData_base = _ctx.at(strides_index).data().base(); + const int startData_size = _ctx.at(starts_index).shape().num_elements(); + const int endData_size = _ctx.at(ends_index).shape().num_elements(); + const int stridesData_size = _ctx.at(strides_index).shape().num_elements(); + + using ir::DataType; + + UNUSED_RELEASE(startData_size); + UNUSED_RELEASE(endData_size); + UNUSED_RELEASE(stridesData_size); + + assert(_ctx.at(starts_index).typeInfo().type() == DataType::INT32); + assert(_ctx.at(ends_index).typeInfo().type() == DataType::INT32); + assert(_ctx.at(strides_index).typeInfo().type() == DataType::INT32); + assert(startData_size == input_rank); + assert(endData_size == input_rank); + assert(stridesData_size == input_rank); + + assert(startData_base != nullptr); + for (int n = 0; n < input_rank; ++n) + { + auto axis = ::neurun::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout, + backend_layout) + .value(); + + int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n); + starts[axis] = start_value; + + int32_t end_value = *(reinterpret_cast<const int32_t *>(endData_base) + n); + ends[axis] = end_value; + + int32_t strides_value = *(reinterpret_cast<const int32_t *>(stridesData_base) + n); + strides[axis] = strides_value; + } + } + + // Set mask bits such as order of inputData + // FIXME Take the layouts into account. + const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank); + const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank); + const auto shrink_axis_mask = + acl_common::ReorderBits<int32_t>(node.param().shrink_axis_mask, input_rank); + + ::arm_compute::Coordinates starts_set; + ::arm_compute::Coordinates ends_set; + ::arm_compute::BiStrides strides_set; + + for (size_t i = 0; i < starts.size(); ++i) + { + starts_set.set(i, starts[i]); + ends_set.set(i, ends[i]); + strides_set.set(i, strides[i]); + } + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEStridedSlice>(); + + fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set, + strides_set, begin_mask, end_mask, shrink_axis_mask); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::TransposeConv &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto output_shape_index{ + node.getInputs().at(ir::operation::TransposeConv::Input::OUTPUT_SHAPE)}; + const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)}; + const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)}; + + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_subg_layout); + + const auto stride = node.param().stride; + + assert((node.param().padding.type == ir::PaddingType::SAME) || + (node.param().padding.type == ir::PaddingType::VALID)); + auto padding = neurun::util::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride, + ker_shape.W, ker_shape.H); + + uint32_t invalid_horizontal = 0; + uint32_t invalid_vertical = 0; + if (node.param().padding.type == ir::PaddingType::VALID) + { + invalid_horizontal = + ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1); + invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1); + } + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ker_alloc = _tensor_builder->at(ker_index).get(); + + const auto tconv_info = acl_common::asPadStrideInfo(padding, stride); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NETransposeConvLayer>(); + + fn->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info, + invalid_horizontal, invalid_vertical); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Transpose &node) +{ + const auto ofm_idx{node.getOutputs().at(0)}; + const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)}; + const auto &perm{node.param().perm}; + + auto ofm_alloc = _tensor_builder->at(ofm_idx).get(); + const auto ifm_alloc = _tensor_builder->at(ifm_idx).get(); + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = ifm_alloc->layout(); + + const auto rank = node.param().rank; + std::vector<std::int32_t> pv(perm.cbegin(), perm.cend()); + auto backend_pv = ::neurun::backend::acl_common::getARMComputePermutationVector( + rank, pv, frontend_layout, backend_layout); + + std::unique_ptr<::arm_compute::IFunction> fn; + + if (ifm_alloc->num_dimensions() <= 2 && ofm_alloc->num_dimensions() <= 2) + { + auto l = nnfw::cpp14::make_unique<::arm_compute::NETranspose>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle()); + + fn = std::move(l); + } + else + { + auto l = nnfw::cpp14::make_unique<::arm_compute::NEPermute>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle(), backend_pv); + + fn = std::move(l); + } + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Unpack &node) +{ + const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)}; + auto axis{node.param().axis}; + + const auto input_rank = node.param().rank; + + std::vector<ir::OperandIndex> output_indexes; + for (const auto &output_index : node.getOutputs()) + output_indexes.emplace_back(output_index); + + auto input = _tensor_builder->at(input_index).get()->handle(); + std::vector<arm_compute::ITensor *> outputs; + for (const auto &output_index : output_indexes) + outputs.emplace_back(_tensor_builder->at(output_index)->handle()); + + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = _tensor_builder->at(input_index).get()->layout(); + if (axis < 0) + axis += input_rank; + axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEUnstack>(); + + fn->configure(input, outputs, axis); + + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const ir::operation::Add &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArithmeticAddition>(); + + fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), + arm_compute::ConvertPolicy::SATURATE); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::Div &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEElementwiseDivision>(); + + fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const ir::operation::Exp &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEExpLayer>(); + + fn->configure(input_alloc->handle(), output_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Comparison &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)}; + const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)}; + + const auto comparison_type = node.param().comparison_type; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input0_alloc = _tensor_builder->at(input0_index).get(); + auto input1_alloc = _tensor_builder->at(input1_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEElementwiseComparison>(); + + fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(), + (arm_compute::ComparisonOperation)comparison_type); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Min &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEElementwiseMin>(); + + fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const ir::operation::Max &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEElementwiseMax>(); + + fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +} // namespace acl_neon +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/acl_neon/KernelGenerator.h b/runtime/neurun/backend/acl_neon/KernelGenerator.h new file mode 100644 index 000000000..f041fb725 --- /dev/null +++ b/runtime/neurun/backend/acl_neon/KernelGenerator.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__ +#define __NEURUN_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__ + +#include <backend/IKernelGenerator.h> + +#include "ir/Operands.h" +#include "TensorBuilder.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ + +class KernelGenerator : public IKernelGenerator +{ +public: + KernelGenerator(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder); + + void visit(const ir::OpSequence &) override; + void visit(const ir::operation::Abs &) override; + void visit(const ir::operation::ArgMax &) override; + void visit(const ir::operation::BatchToSpaceND &) override; + void visit(const ir::operation::Cast &) override; + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthToSpace &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::Dequantize &) override; + void visit(const ir::operation::MaxPool2D &) override; + void visit(const ir::operation::Mean &) override; + void visit(const ir::operation::AvgPool2D &) override; + void visit(const ir::operation::Concat &) override; + void visit(const ir::operation::EmbeddingLookup &) override; + void visit(const ir::operation::Floor &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::Gather &) override; + void visit(const ir::operation::HashtableLookup &) override; + void visit(const ir::operation::InstanceNorm &) override; + void visit(const ir::operation::L2Normalization &) override; + void visit(const ir::operation::L2Pool2D &) override; + void visit(const ir::operation::LocalResponseNormalization &) override; + void visit(const ir::operation::LogicalAnd &) override; + void visit(const ir::operation::LogicalNot &) override; + void visit(const ir::operation::LogicalOr &) override; + void visit(const ir::operation::Logistic &) override; + void visit(const ir::operation::LSTM &) override; + void visit(const ir::operation::Mul &) override; + void visit(const ir::operation::Neg &) override; + void visit(const ir::operation::Pack &) override; + void visit(const ir::operation::Pad &) override; + void visit(const ir::operation::Permute &) override; + void visit(const ir::operation::PReLU &) override; + void visit(const ir::operation::ReduceMax &) override; + void visit(const ir::operation::ReduceMin &) override; + void visit(const ir::operation::ReduceSum &) override; + void visit(const ir::operation::ReLU &) override; + void visit(const ir::operation::ReLU1 &) override; + void visit(const ir::operation::ReLU6 &) override; + void visit(const ir::operation::Reshape &) override; + void visit(const ir::operation::ResizeBilinear &) override; + void visit(const ir::operation::RNN &) override; + void visit(const ir::operation::RSQRT &) override; + void visit(const ir::operation::Squeeze &) override; + void visit(const ir::operation::Tanh &) override; + void visit(const ir::operation::Softmax &) override; + void visit(const ir::operation::SpaceToBatchND &) override; + void visit(const ir::operation::SpaceToDepth &) override; + void visit(const ir::operation::Split &) override; + void visit(const ir::operation::SQRT &) override; + void visit(const ir::operation::SquaredDifference &) override; + void visit(const ir::operation::Sub &) override; + void visit(const ir::operation::Slice &) override; + void visit(const ir::operation::StridedSlice &) override; + void visit(const ir::operation::TransposeConv &) override; + void visit(const ir::operation::Transpose &) override; + void visit(const ir::operation::Unpack &) override; + void visit(const ir::operation::Add &) override; + void visit(const ir::operation::Div &) override; + void visit(const ir::operation::Exp &) override; + void visit(const ir::operation::Comparison &) override; + void visit(const ir::operation::Min &) override; + void visit(const ir::operation::Max &) override; + +private: + const ir::Operands &_ctx; + std::shared_ptr<TensorBuilder> _tensor_builder; + ir::Layout _current_subg_layout; +}; + +} // namespace acl_neon +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__ diff --git a/runtime/neurun/backend/acl_neon/PluginClassesAllocator.cc b/runtime/neurun/backend/acl_neon/PluginClassesAllocator.cc new file mode 100644 index 000000000..75f2e9797 --- /dev/null +++ b/runtime/neurun/backend/acl_neon/PluginClassesAllocator.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <util/logging.h> + +#include "Backend.h" + +extern "C" { +neurun::backend::Backend *neurun_backend_create() +{ + VERBOSE(neurun_backend_create) << "'acl_neon' loaded\n"; + return new neurun::backend::acl_neon::Backend; +} + +void neurun_backend_destroy(neurun::backend::Backend *backend) +{ + VERBOSE(neurun_backend_create) << "'acl_neon' unloaded\n"; + delete backend; +} +} diff --git a/runtime/neurun/backend/acl_neon/ShapeFixer.cc b/runtime/neurun/backend/acl_neon/ShapeFixer.cc new file mode 100644 index 000000000..1d80e57e9 --- /dev/null +++ b/runtime/neurun/backend/acl_neon/ShapeFixer.cc @@ -0,0 +1,439 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ShapeFixer.h" + +#include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h> +#include <arm_compute/runtime/NEON/functions/NEArithmeticAddition.h> +#include <arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h> +#include <arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h> +#include <arm_compute/runtime/NEON/functions/NEPoolingLayer.h> +#include <arm_compute/runtime/NEON/functions/NEActivationLayer.h> +#include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h> +#include <arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h> +#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h> +#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h> +#include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h> + +#include <Convert.h> +#include <Swizzle.h> + +#include "util/Padding.h" +#include "ir/Index.h" +#include "compiler/IExecutionBuilder.h" +#include "exec/NopFunction.h" +#include "util/logging.h" +#include "util/Utils.h" + +using ::neurun::compiler::IExecutionBuilder; + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ + +using ::neurun::backend::acl_common::asAclFunction; + +ShapeFixer::ShapeFixer(const ir::Operands &ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder) + : _ctx(ctx), _tensor_builder(tensor_builder) +{ + assert(tensor_builder); +} + +void ShapeFixer::visit(const ir::operation::Abs &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::ArgMax &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::BatchToSpaceND &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)}; + _tensor_builder->dimCorrection(ofm_index, false); + _tensor_builder->dimCorrection(ifm_index, false); +} + +void ShapeFixer::visit(const ir::operation::Cast &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Conv2D &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::DepthToSpace &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::DepthwiseConv2D &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Dequantize &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::MaxPool2D &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Mean &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::AvgPool2D &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Concat &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + _tensor_builder->dimCorrection(ofm_index, false); + for (const auto &inputs : node.getInputs()) + _tensor_builder->dimCorrection(inputs, false); +} + +void ShapeFixer::visit(const ir::operation::EmbeddingLookup &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)}; + _tensor_builder->dimCorrection(values_index, false); + _tensor_builder->dimCorrection(output_index, false); +} + +void ShapeFixer::visit(const ir::operation::Exp &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Floor &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::FullyConnected &node) +{ + using ir::operation::FullyConnected; + const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)}; + const auto input_rank = _ctx.at(input_index).shape().rank(); + // Check for reshaping input's shape into rank-2 + if (input_rank == 3 || input_rank == 4) + _tensor_builder->dimCorrection(input_index, false); +} + +void ShapeFixer::visit(const ir::operation::HashtableLookup &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)}; + _tensor_builder->dimCorrection(values_index, false); + _tensor_builder->dimCorrection(output_index, false); +} + +void ShapeFixer::visit(const ir::operation::Gather &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)}; + const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)}; + _tensor_builder->dimCorrection(ofm_index, false); + _tensor_builder->dimCorrection(ifm_index, false); + _tensor_builder->dimCorrection(indices_index, false); +} + +void ShapeFixer::visit(const ir::operation::InstanceNorm &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::L2Normalization &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::L2Pool2D &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::LocalResponseNormalization &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::LogicalAnd &node) +{ + const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)}; + const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)}; + + if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank()); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::LogicalNot &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::LogicalOr &node) +{ + const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)}; + const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)}; + + if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank()); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::Logistic &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::LSTM &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Pack &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + _tensor_builder->dimCorrection(ofm_index, false); + for (const auto &inputs : node.getInputs()) + { + _tensor_builder->dimCorrection(inputs, false); + const auto ofm_rank = _ctx.at(ofm_index).shape().rank(); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(inputs).shape()).extendRank(ofm_rank); + } +} + +void ShapeFixer::visit(const ir::operation::Pad &node) +{ + const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)}; + const auto output_index{node.getOutputs().at(0)}; + _tensor_builder->dimCorrection(input_index, false); + _tensor_builder->dimCorrection(output_index, false); +} + +void ShapeFixer::visit(const ir::operation::Mul &node) +{ + const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)}; + + if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::Neg &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Permute &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::PReLU &node) +{ + const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)}; + const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)}; + + if (!(_ctx.at(ifm_index).shape() == _ctx.at(alpha_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(ifm_index).shape().rank(), _ctx.at(alpha_index).shape().rank()); + const_cast<ir::Shape &>(_ctx.at(ifm_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(alpha_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::ReduceMax &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::ReduceMin &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::ReduceSum &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::ReLU &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::ReLU1 &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::ReLU6 &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Reshape &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)}; + + _tensor_builder->dimCorrection(input_index, false); + _tensor_builder->dimCorrection(output_index, false); +} + +void ShapeFixer::visit(const ir::operation::ResizeBilinear &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::RNN &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Comparison &node) +{ + const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)}; + const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)}; + + if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank()); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::RSQRT &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Squeeze &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)}; + _tensor_builder->dimCorrection(input_index, false); + _tensor_builder->dimCorrection(output_index, false); +} + +void ShapeFixer::visit(const ir::operation::Tanh &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Slice &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::StridedSlice &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)}; + _tensor_builder->dimCorrection(ofm_index, false); + _tensor_builder->dimCorrection(ifm_index, false); +} + +void ShapeFixer::visit(const ir::operation::Softmax &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::SpaceToBatchND &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)}; + _tensor_builder->dimCorrection(ofm_index, false); + _tensor_builder->dimCorrection(ifm_index, false); +} + +void ShapeFixer::visit(const ir::operation::SpaceToDepth &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Split &node) +{ + const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)}; + _tensor_builder->dimCorrection(input_index, false); + for (const auto &output : node.getOutputs()) + _tensor_builder->dimCorrection(output, false); +} + +void ShapeFixer::visit(const ir::operation::SQRT &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::SquaredDifference &node) +{ + const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)}; + + if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::Sub &node) +{ + const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)}; + + if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::TransposeConv &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Transpose &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Unpack &node) +{ + const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)}; + _tensor_builder->dimCorrection(input_index, false); + for (const auto &output_index : node.getOutputs()) + _tensor_builder->dimCorrection(output_index, false); +} + +void ShapeFixer::visit(const ir::operation::Add &node) +{ + const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)}; + + if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); + const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::Div &node) +{ + const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)}; + + if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::Min &node) +{ + const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)}; + + if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); + } +} + +void ShapeFixer::visit(const ir::operation::Max &node) +{ + const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)}; + + if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); + + // TODO remove const_cast later. For example, _ctx may need to be a non const variable or + // a node to extend shape may be inserted in front of this operation + const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); + const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); + } +} + +} // namespace acl_neon +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/acl_neon/ShapeFixer.h b/runtime/neurun/backend/acl_neon/ShapeFixer.h new file mode 100644 index 000000000..aa1f8f75a --- /dev/null +++ b/runtime/neurun/backend/acl_neon/ShapeFixer.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_NEON_SHAPE_FIXER_H__ +#define __NEURUN_BACKEND_ACL_NEON_SHAPE_FIXER_H__ + +#include <backend/IShapeFixer.h> + +#include "ir/Operands.h" +#include "TensorBuilder.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ + +class ShapeFixer : public IShapeFixer +{ +public: + ShapeFixer(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder); + + void visit(const ir::operation::Abs &) override; + void visit(const ir::operation::ArgMax &) override; + void visit(const ir::operation::BatchToSpaceND &) override; + void visit(const ir::operation::Cast &) override; + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthToSpace &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::Dequantize &) override; + void visit(const ir::operation::MaxPool2D &) override; + void visit(const ir::operation::Mean &) override; + void visit(const ir::operation::AvgPool2D &) override; + void visit(const ir::operation::Concat &) override; + void visit(const ir::operation::EmbeddingLookup &) override; + void visit(const ir::operation::Exp &) override; + void visit(const ir::operation::Floor &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::Gather &) override; + void visit(const ir::operation::HashtableLookup &) override; + void visit(const ir::operation::InstanceNorm &) override; + void visit(const ir::operation::L2Normalization &) override; + void visit(const ir::operation::L2Pool2D &) override; + void visit(const ir::operation::LocalResponseNormalization &) override; + void visit(const ir::operation::LogicalAnd &) override; + void visit(const ir::operation::LogicalNot &) override; + void visit(const ir::operation::LogicalOr &) override; + void visit(const ir::operation::Logistic &) override; + void visit(const ir::operation::LSTM &) override; + void visit(const ir::operation::Mul &) override; + void visit(const ir::operation::Neg &) override; + void visit(const ir::operation::Pack &) override; + void visit(const ir::operation::Pad &) override; + void visit(const ir::operation::Permute &) override; + void visit(const ir::operation::PReLU &) override; + void visit(const ir::operation::ReduceMax &) override; + void visit(const ir::operation::ReduceMin &) override; + void visit(const ir::operation::ReduceSum &) override; + void visit(const ir::operation::ReLU &) override; + void visit(const ir::operation::ReLU1 &) override; + void visit(const ir::operation::ReLU6 &) override; + void visit(const ir::operation::Reshape &) override; + void visit(const ir::operation::ResizeBilinear &) override; + void visit(const ir::operation::RNN &) override; + void visit(const ir::operation::RSQRT &) override; + void visit(const ir::operation::Squeeze &) override; + void visit(const ir::operation::Tanh &) override; + void visit(const ir::operation::Softmax &) override; + void visit(const ir::operation::SpaceToBatchND &) override; + void visit(const ir::operation::SpaceToDepth &) override; + void visit(const ir::operation::Split &) override; + void visit(const ir::operation::SQRT &) override; + void visit(const ir::operation::SquaredDifference &) override; + void visit(const ir::operation::Sub &) override; + void visit(const ir::operation::Slice &) override; + void visit(const ir::operation::StridedSlice &) override; + void visit(const ir::operation::TransposeConv &) override; + void visit(const ir::operation::Transpose &) override; + void visit(const ir::operation::Unpack &) override; + void visit(const ir::operation::Add &) override; + void visit(const ir::operation::Div &) override; + void visit(const ir::operation::Comparison &) override; + void visit(const ir::operation::Min &) override; + void visit(const ir::operation::Max &) override; + +private: + const ir::Operands &_ctx; + std::shared_ptr<TensorBuilder> _tensor_builder; +}; + +} // namespace acl_neon +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_NEON_SHAPE_FIXER_H__ diff --git a/runtime/neurun/backend/acl_neon/TensorBuilder.h b/runtime/neurun/backend/acl_neon/TensorBuilder.h new file mode 100644 index 000000000..0a6b4921d --- /dev/null +++ b/runtime/neurun/backend/acl_neon/TensorBuilder.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_NEON_TENSOR_BUILDER_H__ +#define __NEURUN_BACKEND_ACL_NEON_TENSOR_BUILDER_H__ + +#include <TemplTensorBuilder.h> + +#include "operand/NETensor.h" +#include "operand/NESubTensor.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ + +using TensorBuilder = + acl_common::TemplTensorBuilder<operand::INETensor, operand::NETensor, operand::NESubTensor>; + +} // namespace acl_neon +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_NEON_TENSOR_BUILDER_H__ diff --git a/runtime/neurun/backend/acl_neon/TensorManager.h b/runtime/neurun/backend/acl_neon/TensorManager.h new file mode 100644 index 000000000..725275cef --- /dev/null +++ b/runtime/neurun/backend/acl_neon/TensorManager.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_NEON_TENSOR_MANAGER_H__ +#define __NEURUN_BACKEND_ACL_NEON_TENSOR_MANAGER_H__ + +#include <arm_compute/runtime/Allocator.h> +#include <arm_compute/runtime/PoolManager.h> +#include <arm_compute/runtime/OffsetLifetimeManager.h> +#include <arm_compute/runtime/MemoryManagerOnDemand.h> +#include <arm_compute/runtime/MemoryGroup.h> + +#include <AclMemoryManager.h> +#include <AclLinearMemoryManager.h> +#include <AclInternalBufferManager.h> +#include <AclTensorManager.h> + +#include "operand/NETensor.h" +#include "operand/NESubTensor.h" + +#include "util/logging.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ + +using MemoryManager = + acl_common::AclMemoryManager<operand::INETensor, operand::NETensor, operand::NESubTensor>; + +using LinearMemoryManager = acl_common::AclLinearMemoryManager< + operand::INETensor, operand::NETensor, operand::NESubTensor, + ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager, + ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator, ::arm_compute::MemoryGroup>; + +using InternalBufferManager = acl_common::AclInternalBufferManager< + ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager, + ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator>; + +using TensorManager = acl_common::AclTensorManager<acl_neon::operand::INETensor, operand::NETensor, + operand::NESubTensor>; + +TensorManager *createTensorManager() +{ + const std::string executor_str = util::getConfigString(util::config::EXECUTOR); + if (executor_str == "Linear") + { + VERBOSE(acl_neon_createTensorManager) << "AclTensorManager as Linear" << std::endl; + return new TensorManager(new MemoryManager(), new LinearMemoryManager(), + new InternalBufferManager()); + } + else + { + VERBOSE(acl_neon_createTensorManager) << "AclTensorManager" << std::endl; + return new TensorManager(new MemoryManager(), new MemoryManager(), new InternalBufferManager()); + } +} + +} // namespace acl_neon +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_NEON_TENSOR_MANAGER_H__ diff --git a/runtime/neurun/backend/acl_neon/TensorRegister.cc b/runtime/neurun/backend/acl_neon/TensorRegister.cc new file mode 100644 index 000000000..fe766cdf9 --- /dev/null +++ b/runtime/neurun/backend/acl_neon/TensorRegister.cc @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TensorRegister.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ + +// NOTHING + +} // srcn +} // backend +} // neurun diff --git a/runtime/neurun/backend/acl_neon/TensorRegister.h b/runtime/neurun/backend/acl_neon/TensorRegister.h new file mode 100644 index 000000000..115e05dee --- /dev/null +++ b/runtime/neurun/backend/acl_neon/TensorRegister.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_NEON_TENSOR_REGISTER_H__ +#define __NEURUN_BACKEND_ACL_NEON_TENSOR_REGISTER_H__ + +#include <AclTensorRegister.h> +#include <misc/polymorphic_downcast.h> +#include "TensorBuilder.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ + +class TensorRegister : public acl_common::AclTensorRegister +{ +public: + TensorRegister(const ir::Operands &operands, const std::shared_ptr<TensorBuilder> &tensor_builder) + : acl_common::AclTensorRegister{operands, tensor_builder} + { + // DO NOTHING + } + + void setUsesCount(const ir::OperandIndex &ind, size_t num_uses) const override + { + nnfw::misc::polymorphic_downcast<TensorBuilder *>(tensor_builder().get()) + ->setUsesCount(ind, num_uses); + } +}; + +} // namespace acl_neon +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_NEON_TENSOR_REGISTER_H__ diff --git a/runtime/neurun/backend/acl_neon/operand/INETensor.cc b/runtime/neurun/backend/acl_neon/operand/INETensor.cc new file mode 100644 index 000000000..fdb20970d --- /dev/null +++ b/runtime/neurun/backend/acl_neon/operand/INETensor.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "INETensor.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ +namespace operand +{ + +void INETensor::access(const std::function<void(ITensor &tensor)> &fn) { fn(*this); } + +} // namespace operand +} // namespace acl_neon +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/acl_neon/operand/INETensor.h b/runtime/neurun/backend/acl_neon/operand/INETensor.h new file mode 100644 index 000000000..22b1140cf --- /dev/null +++ b/runtime/neurun/backend/acl_neon/operand/INETensor.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_NEON_OPERAND_I_NE_TENSOR_H__ +#define __NEURUN_BACKEND_ACL_NEON_OPERAND_I_NE_TENSOR_H__ + +#include <arm_compute/core/ITensor.h> + +#include <IACLTensor.h> + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ +namespace operand +{ + +class INETensor : public acl_common::IACLTensor +{ +public: + const arm_compute::ITensor *handle() const override = 0; + arm_compute::ITensor *handle() override = 0; + void access(const std::function<void(ITensor &tensor)> &fn) final; +}; + +} // namespace operand +} // namespace acl_neon +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_NEON_OPERAND_I_NE_TENSOR_H__ diff --git a/runtime/neurun/backend/acl_neon/operand/NESubTensor.cc b/runtime/neurun/backend/acl_neon/operand/NESubTensor.cc new file mode 100644 index 000000000..a36af609c --- /dev/null +++ b/runtime/neurun/backend/acl_neon/operand/NESubTensor.cc @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NESubTensor.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ +namespace operand +{ + +NESubTensor::NESubTensor(INETensor *parent, const arm_compute::TensorShape &tensor_shape, + const arm_compute::Coordinates &coords, size_t rank, bool extend_parent) + : _ne_sub_tensor(std::make_shared<arm_compute::SubTensor>(parent->handle(), tensor_shape, + coords, extend_parent)), + _rank{rank} +{ + // DO NOTHING +} + +const arm_compute::SubTensor *NESubTensor::handle() const { return _ne_sub_tensor.get(); } + +arm_compute::SubTensor *NESubTensor::handle() { return _ne_sub_tensor.get(); } + +} // namespace operand +} // namespace acl_neon +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/acl_neon/operand/NESubTensor.h b/runtime/neurun/backend/acl_neon/operand/NESubTensor.h new file mode 100644 index 000000000..010e4deda --- /dev/null +++ b/runtime/neurun/backend/acl_neon/operand/NESubTensor.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_NEON_OPERAND_NE_SUB_TENSOR_H__ +#define __NEURUN_BACKEND_ACL_NEON_OPERAND_NE_SUB_TENSOR_H__ + +#include <arm_compute/runtime/SubTensor.h> +#include "INETensor.h" +#include "compiler/SubTensorInfo.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ +namespace operand +{ + +class NESubTensor : public INETensor +{ +public: + NESubTensor() = delete; + +public: + NESubTensor(INETensor *parent, const arm_compute::TensorShape &tensor_shape, + const arm_compute::Coordinates &coords, size_t rank, bool extend_parent = false); + +public: + size_t num_dimensions() const final { return _rank; } + +public: + const arm_compute::SubTensor *handle() const override; + arm_compute::SubTensor *handle() override; + +public: + // This method is used to prevent the use of memcpy for SubTensor + bool has_padding() const override { return true; } + +private: + std::shared_ptr<arm_compute::SubTensor> _ne_sub_tensor; + size_t _rank; +}; + +} // namespace operand +} // namespace acl_neon +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_NEON_OPERAND_NE_SUB_TENSOR_H__ diff --git a/runtime/neurun/backend/acl_neon/operand/NETensor.cc b/runtime/neurun/backend/acl_neon/operand/NETensor.cc new file mode 100644 index 000000000..8a9ece88f --- /dev/null +++ b/runtime/neurun/backend/acl_neon/operand/NETensor.cc @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <arm_compute/runtime/Memory.h> +#include <arm_compute/runtime/MemoryRegion.h> +#include "NETensor.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ +namespace operand +{ + +NETensor::NETensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses) + : _ne_tensor(std::make_shared<arm_compute::Tensor>()), _rank{rank}, _num_uses{num_uses} +{ + allocator()->init(info); +} + +const arm_compute::Tensor *NETensor::handle() const { return _ne_tensor.get(); } + +arm_compute::Tensor *NETensor::handle() { return _ne_tensor.get(); } + +arm_compute::TensorAllocator *NETensor::allocator() { return _ne_tensor->allocator(); } + +} // namespace operand +} // namespace acl_neon +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/acl_neon/operand/NETensor.h b/runtime/neurun/backend/acl_neon/operand/NETensor.h new file mode 100644 index 000000000..3de4695e9 --- /dev/null +++ b/runtime/neurun/backend/acl_neon/operand/NETensor.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_NEON_OPERAND_NE_TENSOR_H__ +#define __NEURUN_BACKEND_ACL_NEON_OPERAND_NE_TENSOR_H__ + +#include <arm_compute/core/TensorInfo.h> +#include <arm_compute/runtime/Tensor.h> +#include "arm_compute/runtime/TensorAllocator.h" +#include "INETensor.h" + +namespace neurun +{ +namespace backend +{ +namespace acl_neon +{ +namespace operand +{ + +class NETensor : public INETensor +{ +public: + NETensor() = delete; + +public: + NETensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses); + +public: + size_t num_dimensions() const final { return _rank; } + +public: + const arm_compute::Tensor *handle() const override; + arm_compute::Tensor *handle() override; + size_t num_uses() const { return _num_uses; } + +public: + arm_compute::TensorAllocator *allocator(); + +private: + std::shared_ptr<arm_compute::Tensor> _ne_tensor; + size_t _rank; + size_t _num_uses; +}; + +} // namespace operand +} // namespace acl_neon +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_NEON_OPERAND_NE_TENSOR_H__ diff --git a/runtime/neurun/backend/cpu/Backend.h b/runtime/neurun/backend/cpu/Backend.h new file mode 100644 index 000000000..e52a776b9 --- /dev/null +++ b/runtime/neurun/backend/cpu/Backend.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_BACKEND_H__ +#define __NEURUN_BACKEND_CPU_BACKEND_H__ + +#include <memory> +#include <backend/Backend.h> +#include <ir/Operands.h> + +#include "Config.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" +#include "ShapeFixer.h" +#include "TensorRegister.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +class Backend : public ::neurun::backend::Backend +{ +public: + Backend() : _config{std::make_shared<Config>()} {} + + std::shared_ptr<IConfig> config() const override { return _config; } + + std::unique_ptr<BackendContext> + newContext(const ir::Operands &operands, + const std::shared_ptr<custom::IKernelBuilder> &kb) const override + { + auto tensor_builder = std::make_shared<TensorBuilder>(); + return std::unique_ptr<BackendContext>{new BackendContext{ + this, tensor_builder, std::make_shared<ConstantInitializer>(operands, tensor_builder), + std::make_shared<KernelGenerator>(operands, tensor_builder, kb), + std::make_shared<ShapeFixer>(operands, tensor_builder), + std::make_shared<TensorRegister>(operands, tensor_builder)}}; + } + +private: + std::shared_ptr<IConfig> _config; +}; + +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_BACKEND_H__ diff --git a/runtime/neurun/backend/cpu/CMakeLists.txt b/runtime/neurun/backend/cpu/CMakeLists.txt new file mode 100644 index 000000000..82c838b15 --- /dev/null +++ b/runtime/neurun/backend/cpu/CMakeLists.txt @@ -0,0 +1,16 @@ +set(LIB_NEURUN_BACKEND_CPU neurun_backend_cpu) + +file(GLOB_RECURSE SOURCES "*.cc") + +add_library(${LIB_NEURUN_BACKEND_CPU} SHARED ${SOURCES}) + +target_link_libraries(${LIB_NEURUN_BACKEND_CPU} PUBLIC nnfw_lib_cpp14) +target_link_libraries(${LIB_NEURUN_BACKEND_CPU} PRIVATE nnfw_lib_misc nnfw_lib_cker) +target_link_libraries(${LIB_NEURUN_BACKEND_CPU} PRIVATE neurun_core) +target_link_libraries(${LIB_NEURUN_BACKEND_CPU} PRIVATE ${LIB_NEURUN_BACKEND_CPU_COMMON}) +target_link_libraries(${LIB_NEURUN_BACKEND_CPU} PRIVATE nnfw_common) +target_link_libraries(${LIB_NEURUN_BACKEND_CPU} PRIVATE nnfw_coverage) + +set_target_properties(${LIB_NEURUN_BACKEND_CPU} PROPERTIES OUTPUT_NAME backend_cpu) + +install(TARGETS ${LIB_NEURUN_BACKEND_CPU} DESTINATION lib) diff --git a/runtime/neurun/backend/cpu/Config.cc b/runtime/neurun/backend/cpu/Config.cc new file mode 100644 index 000000000..39127406f --- /dev/null +++ b/runtime/neurun/backend/cpu/Config.cc @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Config.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +bool Config::initialize() { return true; } + +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/Config.h b/runtime/neurun/backend/cpu/Config.h new file mode 100644 index 000000000..be303b556 --- /dev/null +++ b/runtime/neurun/backend/cpu/Config.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_CONFIG_H__ +#define __NEURUN_BACKEND_CPU_CONFIG_H__ + +#include <backend/IConfig.h> +#include <cpp14/memory.h> +#include <util/ITimer.h> + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +class Config : public IConfig +{ +public: + std::string id() override { return "cpu"; } + bool initialize() override; + bool SupportPermutation() override { return true; } + bool SupportSubTensorAlloc() override + { + // NOTE CPU allocator cannot support subtensor allocation yet + return false; + } + + std::unique_ptr<util::ITimer> timer() override + { + return nnfw::cpp14::make_unique<util::CPUTimer>(); + } +}; + +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_CONFIG_H__ diff --git a/runtime/neurun/backend/cpu/ConstantInitializer.cc b/runtime/neurun/backend/cpu/ConstantInitializer.cc new file mode 100644 index 000000000..e6e7d8deb --- /dev/null +++ b/runtime/neurun/backend/cpu/ConstantInitializer.cc @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConstantInitializer.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +ConstantInitializer::ConstantInitializer(const ir::Operands &operands, + const std::shared_ptr<TensorBuilder> &tensor_builder) + : _operands{operands}, _tensor_builder{tensor_builder} +{ + // DO NOTHING +} + +void ConstantInitializer::visit(const ir::operation::Conv2D &node) +{ + const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL); + const auto &kernel_obj = _operands.at(kernel_index); + registerCopyInitializer(kernel_index, kernel_obj); + + const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS); + const auto &bias_obj = _operands.at(bias_index); + registerCopyInitializer(bias_index, bias_obj); +} + +void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node) +{ + const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL); + const auto &kernel_obj = _operands.at(kernel_index); + registerCopyInitializer(kernel_index, kernel_obj); + + const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS); + const auto &bias_obj = _operands.at(bias_index); + registerCopyInitializer(bias_index, bias_obj); +} + +void ConstantInitializer::visit(const ir::operation::FullyConnected &node) +{ + const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT); + const auto &weight_obj = _operands.at(weight_index); + registerCopyInitializer(weight_index, weight_obj); + + const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS); + const auto &bias_obj = _operands.at(bias_index); + registerCopyInitializer(bias_index, bias_obj); +} + +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/ConstantInitializer.h b/runtime/neurun/backend/cpu/ConstantInitializer.h new file mode 100644 index 000000000..a53321997 --- /dev/null +++ b/runtime/neurun/backend/cpu/ConstantInitializer.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_COMPILER_CPU_CONSTANT_INITIALIZER_H__ +#define __NEURUN_COMPILER_CPU_CONSTANT_INITIALIZER_H__ + +#include <backend/IConstantInitializer.h> +#include <ir/Operands.h> +#include "TensorBuilder.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +class ConstantInitializer : public IConstantInitializer +{ +public: + ConstantInitializer(const ir::Operands &operands, + const std::shared_ptr<TensorBuilder> &tensor_builder); + +public: + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::FullyConnected &) override; + +private: + const ir::Operands &operands() const override { return _operands; } + std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; } + +private: + const ir::Operands &_operands; + std::shared_ptr<TensorBuilder> _tensor_builder; +}; + +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_COMPILER_CPU_CONSTANT_INITIALIZER_H__ diff --git a/runtime/neurun/backend/cpu/KernelGenerator.cc b/runtime/neurun/backend/cpu/KernelGenerator.cc new file mode 100644 index 000000000..09bd1367d --- /dev/null +++ b/runtime/neurun/backend/cpu/KernelGenerator.cc @@ -0,0 +1,624 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "KernelGenerator.h" + +#include <stdexcept> + +#include "cpp14/memory.h" +#include "util/Padding.h" +#include "kernel/OperationUtils.h" +#include "kernel/ConvolutionLayer.h" +#include "kernel/AvgPoolLayer.h" +#include "kernel/MaxPoolLayer.h" +#include "kernel/ConcatLayer.h" +#include "kernel/FullyConnectedLayer.h" +#include "kernel/ReshapeLayer.h" +#include "kernel/SoftMaxLayer.h" +#include "kernel/PermuteLayer.h" +#include "kernel/DepthwiseConvolutionLayer.h" +#include "kernel/AddLayer.h" +#include "kernel/SubLayer.h" +#include "kernel/MulLayer.h" +#include "kernel/GatherLayer.h" +#include "kernel/LogisticLayer.h" +#include "kernel/PadLayer.h" + +#include <backend/Backend.h> +#include <backend/IConfig.h> + +#include "util/logging.h" + +#include "util/Utils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +KernelGenerator::KernelGenerator( + const ir::Operands &operand_ctx, const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builer) + : _ctx(operand_ctx), _tensor_builder(tensor_builder), _kernel_builder(kernel_builer), + _current_subg_layout(ir::Layout::UNKNOWN) +{ + // DO NOTHING +} + +void KernelGenerator::visit(const ir::OpSequence &op_seq) +{ + _current_subg_layout = op_seq.getLayout(); + for (const auto &e : op_seq.operations()) + { + const auto &node = *(e.node); + _tensor_builder->preVisit(node); + node.accept(*this); + _tensor_builder->postVisit(node); + } +} + +void KernelGenerator::visit(const ir::operation::Conv2D &node) +{ + using ir::operation::Conv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; + + const auto stride = node.param().stride; + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, + stride, ker_width, ker_height); + const auto activation = node.param().activation; + + const auto ofm_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout); + const auto ifm_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ifm_index), _current_subg_layout); + const auto ker_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ker_index), ir::Layout::UNKNOWN); + const auto bias_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(bias_index), ir::Layout::UNKNOWN); + + auto ofm_alloc = _tensor_builder->at(ofm_index); + auto ifm_alloc = _tensor_builder->at(ifm_index); + auto ker_alloc = _tensor_builder->at(ker_index); + auto bias_alloc = _tensor_builder->at(bias_index); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::ConvolutionLayer>(); + + fn->configure(ifm_alloc->buffer(), ifm_backend_descr, ker_alloc->buffer(), ker_backend_descr, + bias_alloc->buffer(), bias_backend_descr, padding.left, padding.right, padding.top, + padding.bottom, stride.horizontal, stride.vertical, activation, ofm_alloc->buffer(), + ofm_backend_descr); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) +{ + using ir::operation::DepthwiseConv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)}; + + const auto stride = node.param().stride; + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + // Kernel format is [1, kernel_height, kernel_width, depth_out]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, + stride, ker_width, ker_height); + + const auto ofm_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout); + const auto ifm_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ifm_index), _current_subg_layout); + const auto ker_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ker_index), ir::Layout::UNKNOWN); + const auto bias_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(bias_index), ir::Layout::UNKNOWN); + + const auto multiplier = node.param().multiplier; + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index); + auto ifm_alloc = _tensor_builder->at(ifm_index); + auto ker_alloc = _tensor_builder->at(ker_index); + auto bias_alloc = _tensor_builder->at(bias_index); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::DepthwiseConvolutionLayer>(); + + fn->configure(ifm_alloc->buffer(), ifm_backend_descr, ker_alloc->buffer(), ker_backend_descr, + bias_alloc->buffer(), bias_backend_descr, padding.left, padding.right, padding.top, + padding.bottom, stride.horizontal, stride.vertical, multiplier, activation, + ofm_alloc->buffer(), ofm_backend_descr); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::MaxPool2D &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)}; + + const auto kh = node.param().kh; + const auto kw = node.param().kw; + + const auto stride = node.param().stride; + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + const auto padding = + neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto activation = node.param().activation; + + const auto ofm_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout); + const auto ifm_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ifm_index), _current_subg_layout); + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::MaxPoolLayer>(); + + fn->configure(ifm_alloc->buffer(), ifm_backend_descr, padding.left, padding.right, padding.top, + padding.bottom, stride.horizontal, stride.vertical, kw, kh, activation, + ofm_alloc->buffer(), ofm_backend_descr); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::AvgPool2D &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)}; + + const auto kh = node.param().kh; + const auto kw = node.param().kw; + const auto stride = node.param().stride; + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + const auto padding = + neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto activation = node.param().activation; + + const auto ofm_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout); + const auto ifm_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ifm_index), _current_subg_layout); + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::AvgPoolLayer>(); + + fn->configure(ifm_alloc->buffer(), ifm_backend_descr, padding.left, padding.right, padding.top, + padding.bottom, stride.horizontal, stride.vertical, kw, kh, activation, + ofm_alloc->buffer(), ofm_backend_descr); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::Concat &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + + const auto rank = _ctx.at(ofm_index).shape().rank(); + const auto axis = + ::neurun::backend::cpu::kernel::getAxis(rank, node.param().axis, _current_subg_layout); + + const auto ofm_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout); + std::vector<::neurun::backend::cpu::kernel::TensorDescriptor> ifm_backend_descrs; + for (auto &in_idx : node.getInputs()) + ifm_backend_descrs.emplace_back( + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(in_idx), _current_subg_layout)); + + auto output_alloc = _tensor_builder->at(ofm_index).get(); + + std::vector<const uint8_t *> input_buffers; + for (auto &ifm_idx : node.getInputs()) + input_buffers.emplace_back(_tensor_builder->at(ifm_idx).get()->buffer()); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::ConcatLayer>(); + + fn->configure(input_buffers, ifm_backend_descrs, axis, output_alloc->buffer(), ofm_backend_descr); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::FullyConnected &node) +{ + using ir::operation::FullyConnected; + + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)}; + const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)}; + const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)}; + + const auto ofm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor( + _ctx.at(output_index), _current_subg_layout); + const auto ifm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor( + _ctx.at(input_index), _current_subg_layout); + const auto weight_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor( + _ctx.at(weight_index), ir::Layout::UNKNOWN); + const auto bias_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(bias_index), ir::Layout::UNKNOWN); + + const auto activation = node.param().activation; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + auto weight_alloc = _tensor_builder->at(weight_index).get(); + auto bias_alloc = _tensor_builder->at(bias_index).get(); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::FullyConnectedLayer>(); + + fn->configure(input_alloc->buffer(), ifm_backend_descr, weight_alloc->buffer(), + weight_backend_descr, bias_alloc->buffer(), bias_backend_descr, activation, + output_alloc->buffer(), ofm_backend_descr); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::Reshape &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)}; + + const auto ofm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor( + _ctx.at(output_index), _current_subg_layout); + const auto ifm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor( + _ctx.at(input_index), _current_subg_layout); + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::ReshapeLayer>(); + + fn->configure(input_alloc->buffer(), ifm_backend_descr, output_alloc->buffer(), + ofm_backend_descr); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::Squeeze &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)}; + + const auto ofm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor( + _ctx.at(output_index), _current_subg_layout); + const auto ifm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor( + _ctx.at(input_index), _current_subg_layout); + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + // Squeeze can share same kernel with reshape + auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::ReshapeLayer>(); + + fn->configure(input_alloc->buffer(), ifm_backend_descr, output_alloc->buffer(), + ofm_backend_descr); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::Softmax &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)}; + + const auto ofm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor( + _ctx.at(output_index), _current_subg_layout); + const auto ifm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor( + _ctx.at(input_index), _current_subg_layout); + + const auto beta = node.param().beta; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::SoftMaxLayer>(); + + fn->configure(input_alloc->buffer(), ifm_backend_descr, beta, output_alloc->buffer(), + ofm_backend_descr); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::Add &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)}; + + const auto ofm_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout); + const auto lhs_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(lhs_index), _current_subg_layout); + const auto rhs_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(rhs_index), _current_subg_layout); + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::AddLayer>(); + + fn->configure(lhs_alloc->buffer(), lhs_backend_descr, rhs_alloc->buffer(), rhs_backend_descr, + activation, ofm_alloc->buffer(), ofm_backend_descr); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::Gather &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)}; + const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)}; + + const auto output_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor( + _ctx.at(output_index), _current_subg_layout); + const auto input_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor( + _ctx.at(input_index), _current_subg_layout); + const auto indices_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor( + _ctx.at(indices_index), _current_subg_layout); + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + auto indices_alloc = _tensor_builder->at(indices_index).get(); + + const auto backend_layout = output_alloc->layout(); + UNUSED_RELEASE(backend_layout); + + // NOTE The frontend layout and backend layout must be the same for this operation. + // If not the same, we have to add a stage(?) to perform permutation of output tensor. It + // is not not efficient even if it works well. If so, it would be better to set the + // layout of these backend tensors to the same layout. + // There is also one thing we have to think about. This operation depends on the layout of + // a model. For example, if a model in NHWC has this operation as output rank == 4, indices + // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W + // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case. + assert(backend_layout == input_alloc->layout()); + assert(backend_layout == indices_alloc->layout()); + const auto &input_shape = _ctx.at(input_index).shape(); + UNUSED_RELEASE(input_shape); + assert(input_shape.rank() < 4 || _current_subg_layout == backend_layout); + + const auto axis_raw = node.param().axis; + const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::GatherLayer>(); + + fn->configure(input_alloc->buffer(), input_backend_descr, indices_alloc->buffer(), + indices_backend_descr, output_alloc->buffer(), output_backend_descr, axis_value); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::Sub &node) +{ + // The same as Add + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)}; + + const auto ofm_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout); + const auto lhs_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(lhs_index), _current_subg_layout); + const auto rhs_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(rhs_index), _current_subg_layout); + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::SubLayer>(); + + fn->configure(lhs_alloc->buffer(), lhs_backend_descr, rhs_alloc->buffer(), rhs_backend_descr, + activation, ofm_alloc->buffer(), ofm_backend_descr); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::Mul &node) +{ + // The same as Add + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)}; + + const auto ofm_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout); + const auto lhs_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(lhs_index), _current_subg_layout); + const auto rhs_backend_descr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(rhs_index), _current_subg_layout); + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::MulLayer>(); + + fn->configure(lhs_alloc->buffer(), lhs_backend_descr, rhs_alloc->buffer(), rhs_backend_descr, + activation, ofm_alloc->buffer(), ofm_backend_descr); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::Permute &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(0)}; + + const auto &shape = _ctx.at(output_index).shape(); + const auto input_backend_ctx = node.param().input_backend_ctx; + const auto output_backend_ctx = node.param().output_backend_ctx; + const auto data_type = node.getDataType(); + + output_backend_ctx->tensor_builder->preVisit(node); + + auto output_object = output_backend_ctx->tensor_builder->tensorAt(output_index); + auto input_object = input_backend_ctx->tensor_builder->tensorAt(input_index); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::PermuteLayer>(); + + // TODO Support NCHW frontend + auto out_shape = shape; + if (shape.rank() == 4 && output_object->layout() == ir::Layout::NCHW) + { + out_shape.dim(1) = shape.dim(3); + out_shape.dim(2) = shape.dim(1); + out_shape.dim(3) = shape.dim(2); + } + + const auto permute_type = node.getPermuteType(); + // Check Permutation Type + const auto inferPermuteType = [&]() { + if (input_object->layout() == ir::Layout::NHWC && output_object->layout() == ir::Layout::NCHW) + { + return ir::operation::Permute::Type::NHWC_TO_NCHW; + } + else if (input_object->layout() == ir::Layout::NCHW && + output_object->layout() == ir::Layout::NHWC) + { + return ir::operation::Permute::Type::NCHW_TO_NHWC; + } + else + { + return ir::operation::Permute::Type::COPY; + } + }(); + UNUSED_RELEASE(inferPermuteType); + assert(permute_type == inferPermuteType); + + fn->configure(input_object, output_object, out_shape, permute_type, data_type); + + input_backend_ctx->tensor_builder->postVisit(node); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::Custom &node) +{ + auto get_type_info = [this](const ir::Operand &operand) -> custom::TypeInfo { + auto backendDescr = + ::neurun::backend::cpu::kernel::getTensorDescriptor(operand, _current_subg_layout); + + custom::Shape shape(backendDescr.dimensions.size()); + for (size_t d = 0; d < backendDescr.dimensions.size(); ++d) + { + shape.dim(d) = backendDescr.dimensions[d]; + } + + return {shape, backendDescr.type}; + }; + + auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq, + std::vector<custom::TypeInfo> &types, std::vector<void *> &allocs) { + for (auto &idx : opSeq) + { + const auto &operand = _ctx.at(idx); + // TODO make sure using `_current_subg_layout` is correct for custom operations + types.emplace_back(get_type_info(operand)); + auto in_alloc = _tensor_builder->at(idx)->buffer(); + allocs.emplace_back(in_alloc); + } + }; + + backend::custom::CustomKernelConfigParams params{}; + + fill_op_info(node.getInputs(), params.input_types, params.input_allocations); + fill_op_info(node.getOutputs(), params.output_types, params.output_allocations); + + params.userdata = node.userdata().data; + params.userdata_size = node.userdata().size; + + auto fn = _kernel_builder->buildKernel(node.id(), std::move(params)); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::Logistic &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)}; + + const auto ofm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor( + _ctx.at(output_index), _current_subg_layout); + const auto ifm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor( + _ctx.at(input_index), _current_subg_layout); + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::LogisticLayer>(); + + fn->configure(input_alloc->buffer(), ifm_backend_descr, output_alloc->buffer(), + ofm_backend_descr); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::Pad &node) +{ + const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)}; + const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)}; + const auto output_index{node.getOutputs().at(0)}; + assert(_ctx.at(pad_index).isConstant()); + + auto input = _tensor_builder->at(input_index).get(); + auto output = _tensor_builder->at(output_index).get(); + auto pad_rank = _ctx.at(pad_index).shape().dim(0); + auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data().base()); + const auto ofm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor( + _ctx.at(output_index), _current_subg_layout); + const auto ifm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor( + _ctx.at(input_index), _current_subg_layout); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::PadLayer>(); + + fn->configure(input->buffer(), ifm_backend_descr, output->buffer(), ofm_backend_descr, pad_base, + pad_rank); + + _execution_builder->append(std::move(fn)); +} + +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/KernelGenerator.h b/runtime/neurun/backend/cpu/KernelGenerator.h new file mode 100644 index 000000000..0083d6791 --- /dev/null +++ b/runtime/neurun/backend/cpu/KernelGenerator.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_GENERATOR_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_GENERATOR_H__ + +#include "backend/IKernelGenerator.h" +#include "ir/Operands.h" +#include "operand/Tensor.h" +#include "backend/CustomKernelBuilder.h" +#include "TensorBuilder.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +class KernelGenerator : public IKernelGenerator +{ +public: + KernelGenerator(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<custom::IKernelBuilder> &kernel_builder); + + using IKernelGenerator::visit; + + void visit(const ir::OpSequence &) override; + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::MaxPool2D &) override; + void visit(const ir::operation::AvgPool2D &) override; + void visit(const ir::operation::Concat &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::Reshape &) override; + void visit(const ir::operation::Squeeze &) override; + void visit(const ir::operation::Softmax &) override; + void visit(const ir::operation::Add &) override; + void visit(const ir::operation::Sub &) override; + void visit(const ir::operation::Mul &) override; + void visit(const ir::operation::Permute &) override; + void visit(const ir::operation::Gather &) override; + void visit(const ir::operation::Custom &node) override; + void visit(const ir::operation::Logistic &) override; + void visit(const ir::operation::Pad &) override; + +private: + const ir::Operands &_ctx; + std::shared_ptr<TensorBuilder> _tensor_builder; + std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder; + ir::Layout _current_subg_layout; +}; + +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_GENERATOR_H__ diff --git a/runtime/neurun/backend/cpu/MemoryManager.cc b/runtime/neurun/backend/cpu/MemoryManager.cc new file mode 100644 index 000000000..926d8fb4c --- /dev/null +++ b/runtime/neurun/backend/cpu/MemoryManager.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MemoryManager.h" + +#include <cassert> + +#include <MemoryPlannerFactory.h> +#include "util/logging.h" +#include "util/ConfigSource.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +MemoryManager::MemoryManager() : _mem_planner{createMemoryPlanner()} +{ + // DO NOTHING +} + +MemoryManager::MemoryManager(const std::string planner_id) + : _mem_planner{createMemoryPlanner(planner_id)} +{ + // DO NOTHING +} + +cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner() +{ + auto planner_id = util::getConfigString(util::config::CPU_MEMORY_PLANNER); + return cpu_common::MemoryPlannerFactory::get().create(planner_id); +} + +cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner(const std::string planner_id) +{ + return cpu_common::MemoryPlannerFactory::get().create(planner_id); +} + +void MemoryManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info) +{ + auto tensor = std::make_shared<operand::Tensor>(info); + _tensors[ind] = tensor; +} + +void MemoryManager::claimPlan(const ir::OperandIndex &ind, uint32_t size) +{ + _mem_planner->claim(ind, size); +} + +void MemoryManager::releasePlan(const ir::OperandIndex &ind) { _mem_planner->release(ind); } + +void MemoryManager::allocate(void) +{ + _mem_alloc = std::make_shared<cpu_common::Allocator>(_mem_planner->capacity()); + assert(_mem_alloc->base()); + + for (auto &mem_plan : _mem_planner->memory_plans()) + { + auto ind = mem_plan.first; + auto mem_blk = mem_plan.second; + + uint8_t *buffer = _mem_alloc->base() + mem_blk.offset; + auto tensor = _tensors[ind]; + tensor->setBuffer(buffer); + + VERBOSE(CPU_MEMORYMANAGER) << "TENSOR(#" << ind.value() << "): " << static_cast<void *>(buffer) + << std::endl; + + // If we do not make tensor here currently, kernel generation would cause segmentation fault. + // See also : Comments in `allocate` method. + } +} + +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/MemoryManager.h b/runtime/neurun/backend/cpu/MemoryManager.h new file mode 100644 index 000000000..267e8001b --- /dev/null +++ b/runtime/neurun/backend/cpu/MemoryManager.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_MEMORY_MANAGER_H__ +#define __NEURUN_BACKEND_CPU_MEMORY_MANAGER_H__ + +#include "backend/IMemoryManager.h" +#include <MemoryPlanner.h> +#include "operand/Tensor.h" +#include "ir/OperandIndexMap.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +class MemoryManager : public backend::IMemoryManager +{ +public: + MemoryManager(); + MemoryManager(const std::string); + virtual ~MemoryManager() = default; + + void allocate(void) override; + void deallocate(void) override { _mem_alloc->release(); } + + void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info); + void claimPlan(const ir::OperandIndex &ind, uint32_t size); + void releasePlan(const ir::OperandIndex &ind); + + ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &tensors(void) { return _tensors; } + +private: + cpu_common::IMemoryPlanner *createMemoryPlanner(); + cpu_common::IMemoryPlanner *createMemoryPlanner(const std::string); + +private: + ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> _tensors; + ir::OperandIndexMap<cpu_common::Block> _tensor_mem_map; + std::shared_ptr<cpu_common::IMemoryPlanner> _mem_planner; + std::shared_ptr<cpu_common::Allocator> _mem_alloc; +}; + +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_MEMORY_MANAGER_H__ diff --git a/runtime/neurun/backend/cpu/PluginClassesAllocator.cc b/runtime/neurun/backend/cpu/PluginClassesAllocator.cc new file mode 100644 index 000000000..caba78447 --- /dev/null +++ b/runtime/neurun/backend/cpu/PluginClassesAllocator.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <util/logging.h> + +#include "Backend.h" + +extern "C" { +neurun::backend::Backend *neurun_backend_create() +{ + VERBOSE(neurun_backend_create) << "'cpu' loaded\n"; + return new neurun::backend::cpu::Backend; +} + +void neurun_backend_destroy(neurun::backend::Backend *backend) +{ + VERBOSE(neurun_backend_create) << "'cpu' unloaded\n"; + delete backend; +} +} diff --git a/runtime/neurun/backend/cpu/ShapeFixer.cc b/runtime/neurun/backend/cpu/ShapeFixer.cc new file mode 100644 index 000000000..835592b30 --- /dev/null +++ b/runtime/neurun/backend/cpu/ShapeFixer.cc @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ShapeFixer.h" + +#include <stdexcept> + +#include "cpp14/memory.h" +#include "util/Padding.h" +#include "kernel/OperationUtils.h" +#include "kernel/ConvolutionLayer.h" +#include "kernel/AvgPoolLayer.h" +#include "kernel/MaxPoolLayer.h" +#include "kernel/ConcatLayer.h" +#include "kernel/FullyConnectedLayer.h" +#include "kernel/ReshapeLayer.h" +#include "kernel/SoftMaxLayer.h" +#include "kernel/PermuteLayer.h" +#include "kernel/DepthwiseConvolutionLayer.h" +#include "kernel/AddLayer.h" +#include "kernel/SubLayer.h" +#include "kernel/MulLayer.h" +#include "kernel/GatherLayer.h" + +#include <backend/Backend.h> +#include <backend/IConfig.h> +#include "compiler/IExecutionBuilder.h" + +#include "util/logging.h" + +#include "util/Utils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +ShapeFixer::ShapeFixer(const ir::Operands &operand_ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder) + : _ctx(operand_ctx), _tensor_builder(tensor_builder) +{ + assert(tensor_builder); +} + +void ShapeFixer::visit(const ir::operation::Conv2D &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::DepthwiseConv2D &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::MaxPool2D &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::AvgPool2D &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Concat &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::FullyConnected &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Reshape &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Squeeze &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Softmax &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Gather &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Add &node) +{ + const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)}; + + // Quantization : not supported + if (_ctx.at(lhs_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM) + { + throw std::runtime_error{"ShapeFixer: NYI for quantized Add"}; + } +} + +void ShapeFixer::visit(const ir::operation::Permute &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Sub &node) +{ + // The same as Add + const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)}; + + // Quantization : not supported + if (_ctx.at(lhs_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM) + { + throw std::runtime_error{"ShapeFixer: NYI for quantized Sub"}; + } +} + +void ShapeFixer::visit(const ir::operation::Mul &node) +{ + // The same as Add + const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)}; + + // Quantization : not supported + if (_ctx.at(lhs_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM) + { + throw std::runtime_error{"ShapeFixer: NYI for quantized Mul"}; + } +} + +void ShapeFixer::visit(const ir::operation::Custom &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Logistic &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Pad &node) +{ + // TODO: empty this method when quantization is supported + const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)}; + + // Quantization : not supported + if (_ctx.at(lhs_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM) + { + throw std::runtime_error{"ShapeFixer: NYI for quantized Pad"}; + } +} + +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/ShapeFixer.h b/runtime/neurun/backend/cpu/ShapeFixer.h new file mode 100644 index 000000000..bbf48498e --- /dev/null +++ b/runtime/neurun/backend/cpu/ShapeFixer.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_SHAPE_FIXER_H__ +#define __NEURUN_BACKEND_CPU_SHAPE_FIXER_H__ + +#include <backend/IShapeFixer.h> + +#include "ir/Operands.h" +#include "operand/Tensor.h" +#include "TensorBuilder.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +class ShapeFixer : public IShapeFixer +{ +public: + ShapeFixer(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder); + + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::MaxPool2D &) override; + void visit(const ir::operation::AvgPool2D &) override; + void visit(const ir::operation::Concat &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::Reshape &) override; + void visit(const ir::operation::Squeeze &) override; + void visit(const ir::operation::Softmax &) override; + void visit(const ir::operation::Add &) override; + void visit(const ir::operation::Gather &) override; + void visit(const ir::operation::Sub &) override; + void visit(const ir::operation::Mul &) override; + void visit(const ir::operation::Permute &) override; + void visit(const ir::operation::Custom &) override; + void visit(const ir::operation::Logistic &) override; + void visit(const ir::operation::Pad &) override; + +private: + const ir::Operands &_ctx; + std::shared_ptr<TensorBuilder> _tensor_builder; +}; + +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_SHAPE_FIXER_H__ diff --git a/runtime/neurun/backend/cpu/TensorBuilder.cc b/runtime/neurun/backend/cpu/TensorBuilder.cc new file mode 100644 index 000000000..2c654c256 --- /dev/null +++ b/runtime/neurun/backend/cpu/TensorBuilder.cc @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TensorBuilder.h" + +#include <cassert> + +#include "util/logging.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +TensorBuilder::TensorBuilder() : _tensor_mgr{new TensorManager()} +{ + // DO NOTHING +} + +void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout, bool as_const) +{ + _tensor_info_map.emplace(ind, info); + + if (as_const) + _constants.append(ind); +} + +void TensorBuilder::registerSubTensorInfo(const ir::OperandIndex &, const compiler::SubTensorInfo &) +{ + // Not supported yet + assert(false); +} + +void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind) +{ + assert(_tensor_info_map.find(ind) != _tensor_info_map.end()); + const auto tensor_info = _tensor_info_map.at(ind); + const auto size = tensor_info.total_size(); + _tensor_mgr->buildTensor(ind, tensor_info, _constants.contains(ind)); + _tensor_mgr->claimPlan(ind, size); +} + +void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind) { _tensor_mgr->releasePlan(ind); } + +bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const +{ + return _tensor_info_map.find(ind) != _tensor_info_map.end(); +} + +void TensorBuilder::prepare(void) +{ + _tensor_mgr->allocateConsts(); + _tensor_mgr->allocateNonconsts(); +} + +void TensorBuilder::allocateConsts() +{ + // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate + // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation. +} + +void TensorBuilder::allocateNonconsts() +{ + // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate + // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation. +} + +std::shared_ptr<::neurun::backend::operand::ITensor> +TensorBuilder::tensorAt(const ir::OperandIndex &ind) +{ + return _tensor_mgr->at(ind); +} + +void TensorBuilder::iterate(const IterateFunction &fn) { _tensor_mgr->iterate(fn); } + +std::shared_ptr<operand::Tensor> TensorBuilder::at(const ir::OperandIndex &ind) +{ + return _tensor_mgr->at(ind); +} + +std::unique_ptr<ITensorManager> TensorBuilder::releaseTensorManager(void) +{ + return std::move(_tensor_mgr); +} + +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/TensorBuilder.h b/runtime/neurun/backend/cpu/TensorBuilder.h new file mode 100644 index 000000000..a08db8c9a --- /dev/null +++ b/runtime/neurun/backend/cpu/TensorBuilder.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_TENSOR_BUILDER_H__ +#define __NEURUN_BACKEND_CPU_TENSOR_BUILDER_H__ + +#include <unordered_map> + +#include <backend/ITensorBuilder.h> +#include "operand/Tensor.h" +#include "ir/OperandIndexMap.h" +#include "TensorManager.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +class TensorBuilder : public ITensorBuilder +{ +public: + TensorBuilder(); + + /** + * @brief Register tensor information to allocate on CPU backend + * @param[in] ind Operand index + * @param[in] info Operand information + * @param[in] layout Operand data layout + */ + void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout backend_layout, bool as_const) override; + /** + * @brief Register subtensor information to allocate on CPU backend + * @param[in] ind Operand index + * @param[in] info Tensor information + */ + void registerSubTensorInfo(const ir::OperandIndex &ind, + const compiler::SubTensorInfo &info) override; + + void notifyFirstUse(const ir::OperandIndex &) override; + void notifyLastUse(const ir::OperandIndex &) override; + + bool isRegistered(const ir::OperandIndex &) const override; + + void prepare(void) override; + void allocateConsts() override; + void allocateNonconsts() override; + void postFunctionPrepare() override { /* DO NOTHING */} + void finalize() override { /* DO NOTHING */} + + std::shared_ptr<::neurun::backend::operand::ITensor> + tensorAt(const ir::OperandIndex &ind) override; + + void iterate(const IterateFunction &fn) override; + + void preVisit(const ir::Operation &) override { /* DO NOTHING */} + void postVisit(const ir::Operation &) override { /* DO NOTHING */} + + std::unique_ptr<ITensorManager> releaseTensorManager(void) override; + + std::shared_ptr<operand::Tensor> at(const ir::OperandIndex &ind); + +private: + std::unique_ptr<TensorManager> _tensor_mgr; + ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map; + ir::OperandIndexSequence _constants; +}; + +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_TENSOR_BUILDER_H__ diff --git a/runtime/neurun/backend/cpu/TensorManager.cc b/runtime/neurun/backend/cpu/TensorManager.cc new file mode 100644 index 000000000..a346e2cb4 --- /dev/null +++ b/runtime/neurun/backend/cpu/TensorManager.cc @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TensorManager.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +TensorManager::TensorManager() + : _const_mgr{new MemoryManager("Bump")}, _nonconst_mgr{new MemoryManager()} +{ + // DO NOTHING +} + +void TensorManager::allocateConsts(void) { _const_mgr->allocate(); } + +void TensorManager::allocateNonconsts(void) { _nonconst_mgr->allocate(); } + +void TensorManager::deallocateConsts(void) { _const_mgr->deallocate(); } + +void TensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); } + +void TensorManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info, + bool as_const) +{ + assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end()); + if (as_const) + { + _const_mgr->buildTensor(ind, tensor_info); + _ind_to_mgr.insert({ind, *_const_mgr}); + } + else + { + _nonconst_mgr->buildTensor(ind, tensor_info); + _ind_to_mgr.insert({ind, *_nonconst_mgr}); + } +} + +void TensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size) +{ + assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end()); + _ind_to_mgr.at(ind).claimPlan(ind, size); +} + +void TensorManager::releasePlan(const ir::OperandIndex &ind) +{ + assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end()); + _ind_to_mgr.at(ind).releasePlan(ind); +} + +std::shared_ptr<operand::Tensor> TensorManager::at(const ir::OperandIndex &ind) +{ + assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end()); + return _ind_to_mgr.at(ind).tensors().at(ind); +} + +ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &TensorManager::constTensors(void) +{ + return _const_mgr->tensors(); +} + +ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &TensorManager::nonconstTensors(void) +{ + return _nonconst_mgr->tensors(); +} + +void TensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn) +{ + for (auto it : _nonconst_mgr->tensors()) + fn(it.first); + + for (auto it : _const_mgr->tensors()) + fn(it.first); +} + +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/TensorManager.h b/runtime/neurun/backend/cpu/TensorManager.h new file mode 100644 index 000000000..c3ef70663 --- /dev/null +++ b/runtime/neurun/backend/cpu/TensorManager.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_TENSOR_MANAGER_H__ +#define __NEURUN_BACKEND_CPU_TENSOR_MANAGER_H__ + +#include "backend/ITensorManager.h" +#include "MemoryManager.h" +#include "ir/OperandIndexMap.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +class TensorManager : public backend::ITensorManager +{ +public: + TensorManager(); + virtual ~TensorManager() = default; + + void allocateConsts(void) override; + void allocateNonconsts(void) override; + void deallocateConsts(void) override; + void deallocateNonconsts(void) override; + + void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info, bool as_const); + + void claimPlan(const ir::OperandIndex &ind, uint32_t size); + void releasePlan(const ir::OperandIndex &ind); + + std::shared_ptr<operand::Tensor> at(const ir::OperandIndex &ind); + + ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &constTensors(void); + ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &nonconstTensors(void); + + void iterate(const std::function<void(const ir::OperandIndex &)> &fn); + +private: + std::unique_ptr<MemoryManager> _const_mgr; + std::unique_ptr<MemoryManager> _nonconst_mgr; + ir::OperandIndexMap<MemoryManager &> _ind_to_mgr; +}; + +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_TENSOR_MANAGER_H__ diff --git a/runtime/neurun/backend/cpu/TensorRegister.cc b/runtime/neurun/backend/cpu/TensorRegister.cc new file mode 100644 index 000000000..2701503f5 --- /dev/null +++ b/runtime/neurun/backend/cpu/TensorRegister.cc @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TensorRegister.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +TensorRegister::TensorRegister(const ir::Operands &operands, + const std::shared_ptr<TensorBuilder> &tensor_builder) + : _operands{operands}, _tensor_builder{tensor_builder} +{ + assert(tensor_builder != nullptr); +} + +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/TensorRegister.h b/runtime/neurun/backend/cpu/TensorRegister.h new file mode 100644 index 000000000..1bda9fca3 --- /dev/null +++ b/runtime/neurun/backend/cpu/TensorRegister.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_TENSOR_REGISTER_H__ +#define __NEURUN_BACKEND_CPU_TENSOR_REGISTER_H__ + +#include <backend/ITensorRegister.h> +#include "TensorBuilder.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ + +class TensorRegister : public ITensorRegister +{ +public: + TensorRegister(const ir::Operands &operands, + const std::shared_ptr<TensorBuilder> &tensor_builder); + +private: + const ir::Operands &operands() const override { return _operands; } + std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; } + bool supportSubTensor() const final { return false; } + +private: + const ir::Operands &_operands; + const std::shared_ptr<TensorBuilder> _tensor_builder; +}; + +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_TENSOR_REGISTER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/AddLayer.cc b/runtime/neurun/backend/cpu/kernel/AddLayer.cc new file mode 100644 index 000000000..8a2d872e5 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/AddLayer.cc @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "AddLayer.h" + +#include <cker/operation/BinaryArithmeticOps.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +void AddLayer::addFloat32() +{ + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + nnfw::cker::BinaryArithmeticOpParam op_params; + op_params.float_activation_max = output_activation_max; + op_params.float_activation_min = output_activation_min; + + const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) { + return a + b; + }; + + if (!HaveSameShapes(&_lhsDescr, &_rhsDescr)) + { + nnfw::cker::BroadcastBinaryArithmeticOpSlow( + op_params, convertToExtendedCkerShape(_lhsDescr), _lhsData.f, + convertToExtendedCkerShape(_rhsDescr), _rhsData.f, convertToExtendedCkerShape(_outputDescr), + _outputData.f, fn); + return; + } + + nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr), + _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr), + _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr), + _outputData.f, fn); +} + +void AddLayer::addQuant8() +{ + int32_t output_activation_min, output_activation_max; + CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min, + &output_activation_max); + // nnfw::cker::BinaryArithmeticOpParam op_params; + // op_params.quantized_activation_max = output_activation_max; + // op_params.quantized_activation_min = output_activation_min; + + // cker quant8 add is not implemented yet + throw std::runtime_error{"NYI"}; +} + +void AddLayer::configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData, + const TensorDescriptor &rhsDescr, const ir::Activation activation, + uint8_t *outputData, const TensorDescriptor &outputDescr) +{ + _lhsData.u8 = lhsData; + _lhsDescr = lhsDescr; + _rhsData.u8 = rhsData; + _rhsDescr = rhsDescr; + _inputType = lhsDescr.type; + _activation = activation; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void AddLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + addFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + addQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/AddLayer.h b/runtime/neurun/backend/cpu/kernel/AddLayer.h new file mode 100644 index 000000000..7018e4c48 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/AddLayer.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_ADDLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_ADDLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class AddLayer : public ::neurun::exec::IFunction +{ +public: + AddLayer() : _lhsData(), _rhsData(), _outputData(), _lhsDescr(), _rhsDescr(), _outputDescr() + { + // DO NOTHING + } + +public: + void addFloat32(); + + void addQuant8(); + + void configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData, + const TensorDescriptor &rhsDescr, const ir::Activation activation, + uint8_t *outputData, const TensorDescriptor &outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _lhsData; + DataPtr _rhsData; + DataPtr _outputData; + + TensorDescriptor _lhsDescr; + TensorDescriptor _rhsDescr; + TensorDescriptor _outputDescr; + + ir::Activation _activation{ir::Activation::NONE}; + + OperandType _inputType{OperandType::FLOAT32}; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_ADDLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.cc b/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.cc new file mode 100644 index 000000000..389955796 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "AvgPoolLayer.h" + +#include "OperationUtils.h" + +#include <cker/operation/AveragePool.h> + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +#define AVGPOOLING_PARAMETERS \ + nnfw::cker::PoolParams op_params; \ + op_params.stride_height = _strideHeight; \ + op_params.stride_width = _strideWidth; \ + op_params.filter_height = _kernelHeight; \ + op_params.filter_width = _kernelWidth; \ + op_params.padding_values.height = (int8_t)_paddingTop; \ + op_params.padding_values.width = (int8_t)_paddingLeft; + +AvgPoolLayer::AvgPoolLayer() + : _inputData(), _outputData(), _inputDescr(), _outputDescr(), _paddingLeft(0), _paddingTop(0), + _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), + _kernelHeight(0), _activation(ir::Activation::NONE), _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +void AvgPoolLayer::averagePoolFloat32() +{ + AVGPOOLING_PARAMETERS + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + nnfw::cker::AveragePool(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f); +} +void AvgPoolLayer::averagePoolQuant8() +{ + AVGPOOLING_PARAMETERS + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min, + &output_activation_max); + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + nnfw::cker::AveragePool(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.u8, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.u8); +} + +void AvgPoolLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, + const uint32_t paddingLeft, const uint32_t paddingRight, + const uint32_t paddingTop, const uint32_t paddingBottom, + const uint32_t strideWidth, const uint32_t strideHeight, + const uint32_t kernelWidth, const uint32_t kernelHeight, + const ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _kernelWidth = kernelWidth; + _kernelHeight = kernelHeight; + _activation = activation; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void AvgPoolLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + averagePoolFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + averagePoolQuant8(); + } +} + +#undef AVGPOOLING_PARAMETERS + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.h b/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.h new file mode 100644 index 000000000..6339efa41 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_AVGPOOLLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_AVGPOOLLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class AvgPoolLayer : public ::neurun::exec::IFunction +{ +public: + AvgPoolLayer(); + +public: + void averagePoolFloat32(); + + void averagePoolQuant8(); + + void configure(uint8_t *inputData, const TensorDescriptor inputDescr, const uint32_t paddingLeft, + const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideWidth, + const uint32_t strideHeight, const uint32_t kernelWidth, + const uint32_t kernelHeight, const ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _outputDescr; + + uint32_t _paddingLeft; + uint32_t _paddingTop; + uint32_t _paddingRight; + uint32_t _paddingBottom; + + uint32_t _strideWidth; + uint32_t _strideHeight; + uint32_t _kernelWidth; + uint32_t _kernelHeight; + + ir::Activation _activation; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_AVGPOOLLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/ConcatLayer.cc b/runtime/neurun/backend/cpu/kernel/ConcatLayer.cc new file mode 100644 index 000000000..471c9b3bb --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/ConcatLayer.cc @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConcatLayer.h" + +#include "OperationUtils.h" + +#include <cker/operation/Concatenation.h> + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +ConcatLayer::ConcatLayer() + : _inputDataPtrs(), _outputData(), _axis(0), _inputDescriptors(), _outputDescr(), + _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +void ConcatLayer::concatenationFloat32() +{ + uint32_t num_inputs = _inputDescriptors.size(); + + nnfw::cker::ConcatenationParams op_params; + op_params.axis = _axis; + op_params.inputs_count = num_inputs; + + std::vector<nnfw::cker::Shape *> inputDimsPtr; + std::vector<nnfw::cker::Shape> inputDims; + inputDimsPtr.reserve(num_inputs); + inputDims.reserve(num_inputs); + + for (uint32_t i = 0; i < num_inputs; i++) + { + inputDims.push_back(convertTensorDescriptorToCkerShape(_inputDescriptors[i])); + inputDimsPtr.push_back(&inputDims[i]); + } + + std::vector<const float *> inputFloatPtrs; + + for (auto ptr : _inputDataPtrs) + { + inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(ptr)); + } + + nnfw::cker::Concatenation<float>(op_params, inputDimsPtr.data(), inputFloatPtrs.data(), + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f); +} +void ConcatLayer::concatenationQuant8() +{ + uint32_t num_inputs = _inputDescriptors.size(); + + std::vector<int32_t> input_zeropoints(num_inputs); + std::vector<float> input_scales(num_inputs); + for (uint32_t i = 0; i < num_inputs; i++) + { + input_zeropoints[i] = _inputDescriptors[i].offset; + input_scales[i] = _inputDescriptors[i].scale; + } + + nnfw::cker::ConcatenationParams op_params; + op_params.axis = _axis; + op_params.inputs_count = num_inputs; + op_params.input_zeropoint = input_zeropoints.data(); + op_params.input_scale = input_scales.data(); + op_params.output_zeropoint = _outputDescr.offset; + op_params.output_scale = _outputDescr.scale; + + std::vector<nnfw::cker::Shape *> inputDimsPtr; + std::vector<nnfw::cker::Shape> inputDims; + inputDimsPtr.reserve(num_inputs); + inputDims.reserve(num_inputs); + for (uint32_t i = 0; i < num_inputs; i++) + { + inputDims.push_back(convertTensorDescriptorToCkerShape(_inputDescriptors[i])); + inputDimsPtr.push_back(&inputDims[i]); + } + + nnfw::cker::Concatenation<uint8_t>(op_params, inputDimsPtr.data(), _inputDataPtrs.data(), + convertTensorDescriptorToCkerShape(_outputDescr), + _outputData.u8); +} + +void ConcatLayer::configure(const std::vector<const uint8_t *> &inputDataPtrs, + const std::vector<TensorDescriptor> &inputDescriptors, int32_t axis, + uint8_t *outputData, const TensorDescriptor outputDescr) +{ + _inputDataPtrs = inputDataPtrs; + + for (auto inputDescr : inputDescriptors) + { + _inputDescriptors.emplace_back(inputDescr); + _inputType = inputDescr.type; + } + + _axis = axis; + + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void ConcatLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + concatenationFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + concatenationQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/ConcatLayer.h b/runtime/neurun/backend/cpu/kernel/ConcatLayer.h new file mode 100644 index 000000000..048aa4208 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/ConcatLayer.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_CONCATLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_CONCATLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class ConcatLayer : public ::neurun::exec::IFunction +{ +public: + ConcatLayer(); + +public: + void concatenationFloat32(); + + void concatenationQuant8(); + + void configure(const std::vector<const uint8_t *> &inputDataPtrs, + const std::vector<TensorDescriptor> &inputDescriptors, int32_t axis, + uint8_t *outputData, const TensorDescriptor outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + std::vector<const uint8_t *> _inputDataPtrs; + DataPtr _outputData; + + int32_t _axis; + + std::vector<TensorDescriptor> _inputDescriptors; + TensorDescriptor _outputDescr; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_CONCATLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.cc b/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.cc new file mode 100644 index 000000000..2fdb0baf7 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.cc @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConvolutionLayer.h" + +#include <cker/operation/Conv.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ +ConvolutionLayer::ConvolutionLayer() + : _inputData(), _kernelData(), _outputData(), _biasData(), _inputDescr(), _kernelDescr(), + _outputDescr(), _biasDescr(), _paddingLeft(0), _paddingTop(0), _paddingRight(0), + _paddingBottom(0), _strideWidth(0), _strideHeight(0), _activation(ir::Activation::NONE), + _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +void ConvolutionLayer::convFloat32() +{ + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + + nnfw::cker::ConvParams op_params; + op_params.padding_values.width = _paddingLeft; + op_params.padding_values.height = _paddingTop; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = 1; + op_params.dilation_height_factor = 1; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + nnfw::cker::Conv(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f, + convertTensorDescriptorToCkerShape(_kernelDescr), _kernelData.f, + convertTensorDescriptorToCkerShape(_biasDescr), _biasData.f, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f); +} + +void ConvolutionLayer::convQuant8() +{ + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min, + &output_activation_max); + + float real_multiplier = 0.0; + int32_t output_multiplier = 0; + int32_t output_shift = 0; + GetQuantizedConvolutionMultiplier(_inputDescr, _kernelDescr, _biasDescr, _outputDescr, + &real_multiplier); + QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + + nnfw::cker::ConvParams op_params; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = 1; + op_params.dilation_height_factor = 1; + op_params.padding_values.width = _paddingLeft; + op_params.padding_values.height = _paddingTop; + op_params.input_offset = -_inputDescr.offset; + op_params.weights_offset = -_kernelDescr.offset; + op_params.output_offset = _outputDescr.offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + nnfw::cker::Conv(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.u8, + convertTensorDescriptorToCkerShape(_kernelDescr), _kernelData.u8, + convertTensorDescriptorToCkerShape(_biasDescr), _biasData.i32, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.u8); +} + +void ConvolutionLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, + uint8_t *kernelData, const TensorDescriptor kernelDescr, + uint8_t *biasData, const TensorDescriptor biasDescr, + const uint32_t paddingLeft, const uint32_t paddingRight, + const uint32_t paddingTop, const uint32_t paddingBottom, + const uint32_t strideWidth, const uint32_t strideHeight, + const ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _kernelData.u8 = kernelData; + _kernelDescr = kernelDescr; + _biasData.u8 = biasData; + _biasDescr = biasDescr; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _activation = activation; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void ConvolutionLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + convFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + convQuant8(); + } +} + +#undef ANDROID_NN_CONV_PARAMETERS + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.h b/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.h new file mode 100644 index 000000000..16669f316 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_CONVOLUTIONLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_CONVOLUTIONLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class ConvolutionLayer : public ::neurun::exec::IFunction +{ +public: + ConvolutionLayer(); + +public: + void convFloat32(); + + void convQuant8(); + + void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *kernelData, + const TensorDescriptor kernelDescr, uint8_t *biasData, + const TensorDescriptor biasDescr, const uint32_t paddingLeft, + const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH, + const ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _kernelData; + DataPtr _outputData; + DataPtr _biasData; + + TensorDescriptor _inputDescr; + TensorDescriptor _kernelDescr; + TensorDescriptor _outputDescr; + TensorDescriptor _biasDescr; + + uint32_t _paddingLeft; + uint32_t _paddingTop; + uint32_t _paddingRight; + uint32_t _paddingBottom; + + uint32_t _strideWidth; + uint32_t _strideHeight; + + ir::Activation _activation; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_CONVOLUTIONLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.cc b/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.cc new file mode 100644 index 000000000..e33e3465e --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.cc @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DepthwiseConvolutionLayer.h" + +#include <cker/operation/DepthwiseConv.h> + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +DepthwiseConvolutionLayer::DepthwiseConvolutionLayer() + : _inputData(), _kernelData(), _outputData(), _biasData(), _inputDescr(), _kernelDescr(), + _outputDescr(), _biasDescr(), _paddingLeft(0), _paddingTop(0), _paddingRight(0), + _paddingBottom(0), _strideWidth(0), _strideHeight(0), _multiplier(0), + _activation(ir::Activation::NONE), _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +void DepthwiseConvolutionLayer::convFloat32() +{ + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + + nnfw::cker::DepthwiseConvParams op_params; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = 1; + op_params.dilation_height_factor = 1; + op_params.padding_values.width = _paddingLeft; + op_params.padding_values.height = _paddingTop; + op_params.depth_multiplier = _multiplier; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + nnfw::cker::DepthwiseConv(op_params, convertTensorDescriptorToCkerShape(_inputDescr), + _inputData.f, convertTensorDescriptorToCkerShape(_kernelDescr), + _kernelData.f, convertTensorDescriptorToCkerShape(_biasDescr), + _biasData.f, convertTensorDescriptorToCkerShape(_outputDescr), + _outputData.f); +} + +void DepthwiseConvolutionLayer::convQuant8() +{ + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min, + &output_activation_max); + + float real_multiplier = 0.0; + int32_t output_multiplier = 0; + int32_t output_shift = 0; + GetQuantizedConvolutionMultiplier(_inputDescr, _kernelDescr, _biasDescr, _outputDescr, + &real_multiplier); + QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + + nnfw::cker::DepthwiseConvParams op_params; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = 1; + op_params.dilation_height_factor = 1; + op_params.padding_values.width = _paddingLeft; + op_params.padding_values.height = _paddingTop; + op_params.depth_multiplier = _multiplier; + op_params.input_offset = -_inputDescr.offset; + op_params.weights_offset = -_kernelDescr.offset; + op_params.output_offset = _outputDescr.offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + nnfw::cker::DepthwiseConv(op_params, convertTensorDescriptorToCkerShape(_inputDescr), + _inputData.u8, convertTensorDescriptorToCkerShape(_kernelDescr), + _kernelData.u8, convertTensorDescriptorToCkerShape(_biasDescr), + _biasData.i32, convertTensorDescriptorToCkerShape(_outputDescr), + _outputData.u8); +} + +void DepthwiseConvolutionLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, + uint8_t *kernelData, const TensorDescriptor kernelDescr, + uint8_t *biasData, const TensorDescriptor biasDescr, + const uint32_t paddingLeft, const uint32_t paddingRight, + const uint32_t paddingTop, const uint32_t paddingBottom, + const uint32_t strideWidth, const uint32_t strideHeight, + const uint32_t multiplier, + const ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _kernelData.u8 = kernelData; + _kernelDescr = kernelDescr; + _biasData.u8 = biasData; + _biasDescr = biasDescr; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _multiplier = multiplier; + _activation = activation; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void DepthwiseConvolutionLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + convFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + convQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.h b/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.h new file mode 100644 index 000000000..575cc0ab1 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_KERNEL_CPU_DEPTHWISECONVOLUTIONLAYER_H__ +#define __NEURUN_KERNEL_CPU_DEPTHWISECONVOLUTIONLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class DepthwiseConvolutionLayer : public ::neurun::exec::IFunction +{ +public: + DepthwiseConvolutionLayer(); + +public: + void convFloat32(); + + void convQuant8(); + + void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *kernelData, + const TensorDescriptor kernelDescr, uint8_t *biasData, + const TensorDescriptor biasDescr, const uint32_t paddingLeft, + const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH, + const uint32_t multiplier, const ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _kernelData; + DataPtr _outputData; + DataPtr _biasData; + + TensorDescriptor _inputDescr; + TensorDescriptor _kernelDescr; + TensorDescriptor _outputDescr; + TensorDescriptor _biasDescr; + + uint32_t _paddingLeft; + uint32_t _paddingTop; + uint32_t _paddingRight; + uint32_t _paddingBottom; + + uint32_t _strideWidth; + uint32_t _strideHeight; + + uint32_t _multiplier; + + ir::Activation _activation; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // backend +} // namespace neurun + +#endif // __NEURUN_KERNEL_CPU_DEPTHWISECONVOLUTIONLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.cc b/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.cc new file mode 100644 index 000000000..055f71590 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.cc @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "FullyConnectedLayer.h" + +#include <cker/operation/FullyConnected.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +FullyConnectedLayer::FullyConnectedLayer() + : _inputData(), _weightsData(), _biasData(), _outputData(), _inputDescr(), _weightsDescr(), + _biasDescr(), _outputDescr(), _activation(ir::Activation::NONE), + _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +void FullyConnectedLayer::fullyConnectedFloat32() +{ + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + + nnfw::cker::FullyConnectedParams op_params; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + nnfw::cker::FullyConnected(op_params, convertToExtendedCkerShape(_inputDescr), _inputData.f, + convertToExtendedCkerShape(_weightsDescr), _weightsData.f, + convertToExtendedCkerShape(_biasDescr), _biasData.f, + convertToExtendedCkerShape(_outputDescr), _outputData.f); +} + +// executionMutex is used to protect concurrent access of non-threadsafe resources +// like gemmlowp::GemmContext. +void FullyConnectedLayer::fullyConnectedQuant8() +{ + float real_multiplier = 0.0; + int32_t output_multiplier = 0; + int32_t output_shift = 0; + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + GetQuantizedConvolutionMultiplier(_inputDescr, _weightsDescr, _biasDescr, _outputDescr, + &real_multiplier); + QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min, + &output_activation_max); + + nnfw::cker::FullyConnectedParams op_params; + op_params.input_offset = -_inputDescr.offset; + op_params.weights_offset = -_weightsDescr.offset; + op_params.output_offset = _outputDescr.offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + nnfw::cker::FullyConnected(op_params, convertToExtendedCkerShape(_inputDescr), _inputData.u8, + convertToExtendedCkerShape(_weightsDescr), _weightsData.u8, + convertToExtendedCkerShape(_biasDescr), _biasData.i32, + convertToExtendedCkerShape(_outputDescr), _outputData.u8); +} + +void FullyConnectedLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, + uint8_t *weightsData, const TensorDescriptor weightsDescr, + uint8_t *biasData, const TensorDescriptor biasDescr, + ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _weightsData.u8 = weightsData; + _weightsDescr = weightsDescr; + _biasData.u8 = biasData; + _biasDescr = biasDescr; + _activation = activation; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void FullyConnectedLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + fullyConnectedFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + fullyConnectedQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.h b/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.h new file mode 100644 index 000000000..9fdc393a4 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_FULLYCONNECTEDLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_FULLYCONNECTEDLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class FullyConnectedLayer : public ::neurun::exec::IFunction +{ +public: + FullyConnectedLayer(); + +public: + void fullyConnectedFloat32(); + + void fullyConnectedQuant8(); + + void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *weightsData, + const TensorDescriptor weightsDescr, uint8_t *biasData, + const TensorDescriptor biasDescr, ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _weightsData; + DataPtr _biasData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _weightsDescr; + TensorDescriptor _biasDescr; + TensorDescriptor _outputDescr; + + ir::Activation _activation; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_FULLYCONNECTEDLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/GatherLayer.cc b/runtime/neurun/backend/cpu/kernel/GatherLayer.cc new file mode 100644 index 000000000..b29acba79 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/GatherLayer.cc @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GatherLayer.h" + +#include <cker/operation/Gather.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +void GatherLayer::configure(uint8_t *inputData, const TensorDescriptor &inputDescr, + uint8_t *indicesData, const TensorDescriptor &indicesDescr, + uint8_t *outputData, const TensorDescriptor &outputDescr, int32_t axis) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _indicesData.u8 = indicesData; + _indicesDescr = indicesDescr; + _axis = axis; + _inputType = inputDescr.type; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void GatherLayer::run() +{ + nnfw::cker::GatherParams op_params; + op_params.axis = _axis; + + switch (_inputType) + { + case OperandType::FLOAT32: + nnfw::cker::Gather<float>(op_params, convertTensorDescriptorToCkerShape(_inputDescr), + _inputData.f, convertTensorDescriptorToCkerShape(_indicesDescr), + _indicesData.i32, convertTensorDescriptorToCkerShape(_outputDescr), + _outputData.f); + break; + case OperandType::QUANT8_ASYMM: + nnfw::cker::Gather<uint8_t>(op_params, convertTensorDescriptorToCkerShape(_inputDescr), + _inputData.u8, convertTensorDescriptorToCkerShape(_indicesDescr), + _indicesData.i32, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.u8); + break; + case OperandType::INT32: + nnfw::cker::Gather<int32_t>( + op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.i32, + convertTensorDescriptorToCkerShape(_indicesDescr), _indicesData.i32, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.i32); + break; + default: + throw std::runtime_error("Gather NYI for this operand type!"); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/GatherLayer.h b/runtime/neurun/backend/cpu/kernel/GatherLayer.h new file mode 100644 index 000000000..af4f8b8f6 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/GatherLayer.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_GATHERLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_GATHERLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class GatherLayer : public ::neurun::exec::IFunction +{ +public: + GatherLayer() + : _inputData{nullptr}, _indicesData{nullptr}, _outputData{nullptr}, _axis{-1}, + _inputType{OperandType::FLOAT32} + { + // DO NOTHING + } + +public: + void configure(uint8_t *inputData, const TensorDescriptor &inputDescr, uint8_t *indicesData, + const TensorDescriptor &indicesDescr, uint8_t *outputData, + const TensorDescriptor &outputDescr, int32_t axis); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _indicesData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _indicesDescr; + TensorDescriptor _outputDescr; + + int32_t _axis; + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_GATHERLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/LogisticLayer.cc b/runtime/neurun/backend/cpu/kernel/LogisticLayer.cc new file mode 100644 index 000000000..d9916964e --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/LogisticLayer.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "LogisticLayer.h" + +#include <cker/operation/Logistic.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +LogisticLayer::LogisticLayer() + : _inputData(), _outputData(), _inputDescr(), _outputDescr(), _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +void LogisticLayer::logisticFloat32() +{ + nnfw::cker::Logistic(convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f); +} + +void LogisticLayer::logisticQuant8() +{ + // cker quant8 logistic is not implemented yet + throw std::runtime_error{"NYI"}; +} + +void LogisticLayer::configure(uint8_t *inputData, const TensorDescriptor &inputDescr, + uint8_t *outputData, const TensorDescriptor &outputDescr) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void LogisticLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + logisticFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + logisticQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/LogisticLayer.h b/runtime/neurun/backend/cpu/kernel/LogisticLayer.h new file mode 100644 index 000000000..33fcd6fed --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/LogisticLayer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_LOGISTICLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_LOGISTICLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class LogisticLayer : public ::neurun::exec::IFunction +{ +public: + LogisticLayer(); + +public: + void logisticFloat32(); + + void logisticQuant8(); + + void configure(uint8_t *inputData, const TensorDescriptor &inputDescr, uint8_t *outputData, + const TensorDescriptor &outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _outputDescr; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_LOGISTICLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.cc b/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.cc new file mode 100644 index 000000000..095cd6d1d --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MaxPoolLayer.h" + +#include <cker/operation/MaxPool.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +#define MAXPOOLING_PARAMETERS \ + nnfw::cker::PoolParams op_params; \ + op_params.stride_height = _strideHeight; \ + op_params.stride_width = _strideWidth; \ + op_params.filter_height = _kernelHeight; \ + op_params.filter_width = _kernelWidth; \ + op_params.padding_values.height = (int8_t)_paddingTop; \ + op_params.padding_values.width = (int8_t)_paddingLeft; + +MaxPoolLayer::MaxPoolLayer() + : _inputData(), _outputData(), _inputDescr(), _outputDescr(), _paddingLeft(0), _paddingTop(0), + _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), + _kernelHeight(0), _activation(ir::Activation::NONE), _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +void MaxPoolLayer::maxPoolFloat32() +{ + MAXPOOLING_PARAMETERS + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + nnfw::cker::MaxPool(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f); +} +void MaxPoolLayer::maxPoolQuant8() +{ + MAXPOOLING_PARAMETERS + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min, + &output_activation_max); + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + nnfw::cker::MaxPool(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.u8, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.u8); +} + +void MaxPoolLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, + const uint32_t paddingLeft, const uint32_t paddingRight, + const uint32_t paddingTop, const uint32_t paddingBottom, + const uint32_t strideWidth, const uint32_t strideHeight, + const uint32_t kernelWidth, const uint32_t kernelHeight, + const ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr) +{ + _inputData.u8 = inputData; + + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _kernelWidth = kernelWidth; + _kernelHeight = kernelHeight; + _activation = activation; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void MaxPoolLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + maxPoolFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + maxPoolQuant8(); + } +} + +#undef MAXPOOLING_PARAMETERS + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.h b/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.h new file mode 100644 index 000000000..88a574c42 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_MAXPOOLLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_MAXPOOLLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class MaxPoolLayer : public ::neurun::exec::IFunction +{ +public: + MaxPoolLayer(); + +public: + void maxPoolFloat32(); + + void maxPoolQuant8(); + + void configure(uint8_t *inputData, const TensorDescriptor inputDescr, const uint32_t paddingLeft, + const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideWidth, + const uint32_t strideHeight, const uint32_t kernelWidth, + const uint32_t kernelHeight, const ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _outputDescr; + + uint32_t _paddingLeft; + uint32_t _paddingTop; + uint32_t _paddingRight; + uint32_t _paddingBottom; + + uint32_t _strideWidth; + uint32_t _strideHeight; + uint32_t _kernelWidth; + uint32_t _kernelHeight; + + ir::Activation _activation; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_MAXPOOLLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/MulLayer.cc b/runtime/neurun/backend/cpu/kernel/MulLayer.cc new file mode 100644 index 000000000..d6ce2cfad --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/MulLayer.cc @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MulLayer.h" + +#include <cker/operation/BinaryArithmeticOps.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +void MulLayer::mulFloat32() +{ + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + nnfw::cker::BinaryArithmeticOpParam op_params; + op_params.float_activation_max = output_activation_max; + op_params.float_activation_min = output_activation_min; + + const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) { + return a * b; + }; + + if (!HaveSameShapes(&_lhsDescr, &_rhsDescr)) + { + nnfw::cker::BroadcastBinaryArithmeticOpSlow( + op_params, convertToExtendedCkerShape(_lhsDescr), _lhsData.f, + convertToExtendedCkerShape(_rhsDescr), _rhsData.f, convertToExtendedCkerShape(_outputDescr), + _outputData.f, fn); + return; + } + + nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr), + _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr), + _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr), + _outputData.f, fn); +} + +void MulLayer::mulQuant8() +{ + int32_t output_activation_min, output_activation_max; + CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min, + &output_activation_max); + // nnfw::cker::BinaryArithmeticOpParam op_params; + // op_params.quantized_activation_max = output_activation_max; + // op_params.quantized_activation_min = output_activation_min; + + // cker quant8 mul is not implemented yet + throw std::runtime_error{"Mull NYI for quantized"}; +} + +void MulLayer::configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData, + const TensorDescriptor &rhsDescr, const ir::Activation activation, + uint8_t *outputData, const TensorDescriptor &outputDescr) +{ + _lhsData.u8 = lhsData; + _lhsDescr = lhsDescr; + _rhsData.u8 = rhsData; + _rhsDescr = rhsDescr; + _inputType = lhsDescr.type; + _activation = activation; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void MulLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + mulFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + mulQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/MulLayer.h b/runtime/neurun/backend/cpu/kernel/MulLayer.h new file mode 100644 index 000000000..05fc3052f --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/MulLayer.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class MulLayer : public ::neurun::exec::IFunction +{ +public: + MulLayer() : _lhsData(), _rhsData(), _outputData(), _lhsDescr(), _rhsDescr(), _outputDescr() + { + // DO NOTHING + } + +public: + void mulFloat32(); + + void mulQuant8(); + + void configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData, + const TensorDescriptor &rhsDescr, const ir::Activation activation, + uint8_t *outputData, const TensorDescriptor &outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _lhsData; + DataPtr _rhsData; + DataPtr _outputData; + + TensorDescriptor _lhsDescr; + TensorDescriptor _rhsDescr; + TensorDescriptor _outputDescr; + + ir::Activation _activation{ir::Activation::NONE}; + + OperandType _inputType{OperandType::FLOAT32}; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/OperationUtils.cc b/runtime/neurun/backend/cpu/kernel/OperationUtils.cc new file mode 100644 index 000000000..8aa15dcbd --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/OperationUtils.cc @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OperationUtils.h" + +#include <cmath> +#include <algorithm> +#include <cassert> + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +uint32_t getNumberOfDimensions(const TensorDescriptor &descr) { return descr.dimensions.size(); } + +uint32_t getNumberOfElements(const TensorDescriptor &descr) +{ + uint32_t count = 1; + for (size_t i = 0; i < descr.dimensions.size(); i++) + { + count *= descr.dimensions[i]; + } + return count; +} + +uint32_t getSizeOfDimension(const TensorDescriptor &descr, uint32_t dimensionIdx) +{ + if (dimensionIdx >= descr.dimensions.size()) + { + // TODO, log the error + return 0; + } + return descr.dimensions[dimensionIdx]; +} + +void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift) +{ + if (double_multiplier == 0.) + { + *quantized_multiplier = 0; + *shift = 0; + return; + } + const double q = std::frexp(double_multiplier, shift); + auto q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31))); + + assert(q_fixed <= (1ll << 31)); + if (q_fixed == (1ll << 31)) + { + q_fixed /= 2; + ++*shift; + } + assert(q_fixed <= std::numeric_limits<int32_t>::max()); + *quantized_multiplier = static_cast<int32_t>(q_fixed); +} + +void GetQuantizedConvolutionMultiplier(const TensorDescriptor &inputDescr, + const TensorDescriptor &filterDescr, + const TensorDescriptor &biasDescr, + const TensorDescriptor &outputDescr, float *multiplier) +{ + const float input_product_scale = inputDescr.scale * filterDescr.scale; + const float bias_scale = biasDescr.scale; + const float output_scale = outputDescr.scale; + // The following conditions must be guaranteed by the training pipeline. + UNUSED_RELEASE(bias_scale); + assert(std::abs(input_product_scale - bias_scale) <= + 1e-6 * std::min(input_product_scale, bias_scale)); + assert(input_product_scale >= 0); + assert(input_product_scale < output_scale); + *multiplier = input_product_scale / output_scale; +} + +void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier, + int *left_shift) +{ + assert(double_multiplier > 1.); + const double q = std::frexp(double_multiplier, left_shift); + int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31))); + assert(q_fixed <= (1ll << 31)); + if (q_fixed == (1ll << 31)) + { + q_fixed /= 2; + ++*left_shift; + } + assert(*left_shift >= 0); + assert(q_fixed <= std::numeric_limits<int32_t>::max()); + *quantized_multiplier = static_cast<int32_t>(q_fixed); +} + +void CalculateActivationRangeFloat(ir::Activation activation, float *activation_min, + float *activation_max) +{ + if (activation == ir::Activation::RELU) + { + *activation_min = 0.f; + *activation_max = std::numeric_limits<float>::max(); + } + else if (activation == ir::Activation::RELU6) + { + *activation_min = 0.f; + *activation_max = 6.f; + } + else if (activation == ir::Activation::RELU1) + { + *activation_min = -1.f; + *activation_max = 1.f; + } + else if (activation == ir::Activation::SIGMOID) + { + *activation_min = 0.f; + *activation_max = 1.f; + } + else if (activation == ir::Activation::NONE) + { + *activation_min = std::numeric_limits<float>::lowest(); + *activation_max = std::numeric_limits<float>::max(); + } + else + { + std::cout << "Unsupported fused activation function." << std::endl; + } +} + +void CalculateActivationRangeUint8(ir::Activation activation, const TensorDescriptor &outputDescr, + int32_t *act_min, int32_t *act_max) +{ + const int32_t qmin = std::numeric_limits<uint8_t>::min(); + const int32_t qmax = std::numeric_limits<uint8_t>::max(); + const auto scale = outputDescr.scale; + const auto zero_point = outputDescr.offset; + auto quantize = [scale, zero_point](float f) { + return zero_point + static_cast<int32_t>(std::round(f / scale)); + }; + if (activation == ir::Activation::RELU) + { + *act_min = std::max(qmin, quantize(0.0)); + *act_max = qmax; + } + else if (activation == ir::Activation::RELU6) + { + *act_min = std::max(qmin, quantize(0.0)); + *act_max = std::min(qmax, quantize(6.0)); + } + else if (activation == ir::Activation::RELU1) + { + *act_min = std::max(qmin, quantize(-1.0)); + *act_max = std::min(qmax, quantize(1.0)); + } + else if (activation == ir::Activation::SIGMOID) + { + *act_min = std::max(qmin, quantize(0.0)); + *act_max = std::min(qmax, quantize(1.0)); + } + else if (activation == ir::Activation::NONE) + { + *act_min = qmin; + *act_max = qmax; + } + else + { + std::cout << "Unsupported fused activation function." << std::endl; + } +} + +bool HaveSameShapes(const TensorDescriptor *input1, const TensorDescriptor *input2) +{ + if (input1 == input2) + return true; + if (input2 == NULL || input2 == NULL) + return false; + + if (input1 == NULL) + { + return (getNumberOfDimensions(*input2) == 0); + } + + if (getNumberOfDimensions(*input1) != getNumberOfDimensions(*input2)) + return false; + + for (uint32_t i = 0; i < getNumberOfDimensions(*input1); i++) + if (input1->dimensions[i] != input2->dimensions[i]) + return false; + + return true; +} + +int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift) +{ + const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) * + (1ll << (31 - input_integer_bits)) / (1ll << input_left_shift); + // Tighten bound using floor. Suppose that we could use the exact value. + // After scaling the difference, the result would be at the maximum. Thus we + // must ensure that our value has lower magnitude. + return static_cast<int32_t>(std::floor(max_input_rescaled)); +} + +TensorDescriptor getTensorDescriptor(const ir::Operand &o, ir::Layout frontend_layout) +{ + TensorDescriptor descriptor; + + auto dims = o.shape().dims(); + if (frontend_layout == ir::Layout::NCHW && o.shape().rank() == 4) + { + // NCHW -> NHWC + uint32_t permutation[4] = {0, 2, 3, 1}; + for (int i = 0; i < o.shape().rank(); ++i) + { + dims.at(i) = o.shape().dim(permutation[i]); + } + } + descriptor.dimensions = std::vector<uint32_t>(dims.begin(), dims.end()); + descriptor.type = static_cast<OperandType>(static_cast<int32_t>(o.typeInfo().type())); + descriptor.scale = o.typeInfo().scale(); + descriptor.offset = o.typeInfo().offset(); + + // CPU backend assume that neurun internal shape's rank is always same or less than 4 + assert(descriptor.dimensions.size() <= 4); + + return descriptor; +} + +uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions) +{ + uint32_t size = 4; + + switch (type) + { + case OperandType::FLOAT32: + case OperandType::INT32: + case OperandType::UINT32: + size = 4; + break; + case OperandType::BOOL8: + case OperandType::QUANT8_ASYMM: + case OperandType::QUANT8_SYMM: + size = 1; + break; + default: + throw std::runtime_error("Not supported operand type."); + break; + } + + for (auto d : dimensions) + { + size *= d; + } + + return size; +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/OperationUtils.h b/runtime/neurun/backend/cpu/kernel/OperationUtils.h new file mode 100644 index 000000000..b9e8c8974 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/OperationUtils.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__ +#define __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__ + +#include <iostream> +#include <limits> +#include <vector> + +#include <cker/Shape.h> + +#include "ir/Operand.h" +#include "ir/DataType.h" +#include <ir/InternalType.h> + +using OperandType = neurun::ir::DataType; + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +struct TensorDescriptor +{ + OperandType type; + std::vector<uint32_t> dimensions; + float scale; + int32_t offset; +}; + +union DataPtr { + uint8_t *u8; + int8_t *i8; + int32_t *i32; + float *f; + void *v; +}; + +uint32_t getNumberOfDimensions(const TensorDescriptor &descr); + +uint32_t getNumberOfElements(const TensorDescriptor &descr); + +uint32_t getSizeOfDimension(const TensorDescriptor &descr, uint32_t dimensionIdx); + +inline nnfw::cker::Shape convertToExtendedCkerShape(const TensorDescriptor &descr) +{ + std::vector<int32_t> raw_shape; + raw_shape.resize(4); + + uint32_t src = 4 - descr.dimensions.size(); + for (uint32_t i = 0; i < 4; ++i) + { + if (i < src) + { + raw_shape[i] = 1; + } + else + { + raw_shape[i] = descr.dimensions[i - src]; + } + } + + return nnfw::cker::GetShape(raw_shape); +} + +inline nnfw::cker::Shape convertTensorDescriptorToCkerShape(const TensorDescriptor &descr) +{ + std::vector<int32_t> raw_shape; + raw_shape.resize(4); + + for (uint32_t i = 0; i < 4; ++i) + { + if (i >= descr.dimensions.size()) + { + raw_shape[i] = 1; + } + else + { + raw_shape[i] = descr.dimensions[i]; + } + } + + return nnfw::cker::GetShape(raw_shape); +} + +inline int32_t getAxis(uint32_t rank, int32_t axis, ir::Layout frontend_layout) +{ + auto ret = axis; + + if (axis < 0) + { + ret += rank; + } + + // NCHW -> NHWC + if (frontend_layout == ir::Layout::NCHW) + { + int32_t permutation[4] = {0, 3, 1, 2}; + ret = permutation[ret]; + } + + return ret; +} + +void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift); + +void GetQuantizedConvolutionMultiplier(const TensorDescriptor &inputDescr, + const TensorDescriptor &filterDescr, + const TensorDescriptor &biasDescr, + const TensorDescriptor &outputDescr, float *multiplier); + +void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier, + int *left_shift); + +void CalculateActivationRangeFloat(ir::Activation activation, float *activation_min, + float *activation_max); + +void CalculateActivationRangeUint8(ir::Activation activation, const TensorDescriptor &outputDescr, + int32_t *act_min, int32_t *act_max); + +bool HaveSameShapes(const TensorDescriptor *input1, const TensorDescriptor *input2); + +int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift); + +TensorDescriptor getTensorDescriptor(const ir::Operand &o, ir::Layout frontend_layout); + +uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions); + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__ diff --git a/runtime/neurun/backend/cpu/kernel/PadLayer.cc b/runtime/neurun/backend/cpu/kernel/PadLayer.cc new file mode 100644 index 000000000..1fd9429b5 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/PadLayer.cc @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PadLayer.h" + +#include "OperationUtils.h" + +#include <cker/operation/Pad.h> + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +PadLayer::PadLayer() + : _inputData(), _outputData(), _inputDescr(), _outputDescr(), _padData(), _padRank(), + _constantValueData(), _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +void PadLayer::padFloat32() +{ + nnfw::cker::Pad(_padData, _padRank, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f, + _constantValueData.f); +} +void PadLayer::padQuant8() { throw std::runtime_error("Quantized Pad isn't supported NYI"); } + +void PadLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *outputData, + const TensorDescriptor outputDescr, const int32_t *padData, + int32_t padRank, uint8_t *constantValueData) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _outputData.u8 = outputData; + _outputDescr = outputDescr; + _padData = padData; + _padRank = padRank; + _constantValueData.u8 = constantValueData; +} + +void PadLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + padFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + padQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/PadLayer.h b/runtime/neurun/backend/cpu/kernel/PadLayer.h new file mode 100644 index 000000000..f4413a8ed --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/PadLayer.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_PADLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_PADLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +// Note, this is pad with mode=`CONSTANT`: it doesn't support `REFLECT` and `SYMMETRIC` +class PadLayer : public ::neurun::exec::IFunction +{ +public: + PadLayer(); + +public: + void padFloat32(); + + void padQuant8(); + + void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *outputData, + const TensorDescriptor outputDescr, const int32_t *padData, int32_t padRank, + uint8_t *constantValueData = nullptr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _outputDescr; + + const int32_t *_padData; + int32_t _padRank; + DataPtr _constantValueData; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_PADLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/PermuteLayer.cc b/runtime/neurun/backend/cpu/kernel/PermuteLayer.cc new file mode 100644 index 000000000..6f28d8436 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/PermuteLayer.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PermuteLayer.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +using Type = ir::operation::Permute::Type; + +void PermuteLayer::configure(std::shared_ptr<backend::operand::ITensor> input, + std::shared_ptr<backend::operand::ITensor> output, + const ir::Shape &output_shape, Type type, ir::DataType dataType) +{ + _input = input; + _output = output; + _output_shape = output_shape; + _type = type; + _dataType = dataType; +} + +void PermuteLayer::run() +{ + using ir::DataType; + switch (_dataType) + { + case DataType::FLOAT32: + runTempl<float>(); + break; + case DataType::INT32: + runTempl<int32_t>(); + break; + case DataType::UINT32: + runTempl<uint32_t>(); + break; + case DataType::BOOL8: + case DataType::QUANT8_ASYMM: + runTempl<uint8_t>(); + break; + case DataType::QUANT8_SYMM: + runTempl<int8_t>(); + break; + default: + throw std::runtime_error("NYI"); + break; + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/PermuteLayer.h b/runtime/neurun/backend/cpu/kernel/PermuteLayer.h new file mode 100644 index 000000000..1f9110807 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/PermuteLayer.h @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_PERMUTE_LAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_PERMUTE_LAYER_H__ + +#include <exec/IFunction.h> + +#include "util/feature/nhwc/View.h" +#include "OperationUtils.h" +#include "ir/operation/Permute.h" +#include "util/feature/nhwc/Reader.h" +#include "util/feature/nchw/View.h" +#include "util/Coordinates.h" + +#include <misc/feature/IndexIterator.h> +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class PermuteLayer : public ::neurun::exec::IFunction +{ +public: + PermuteLayer() = default; + +public: + void configure(std::shared_ptr<backend::operand::ITensor> input, + std::shared_ptr<backend::operand::ITensor> output, const ir::Shape &output_shape, + ir::operation::Permute::Type type, ir::DataType dataType); + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + template <class T> void runTempl() + { + auto rank = _output_shape.rank(); + auto fn = [&](::neurun::backend::operand::ITensor &in_tensor) { + _output->access([&](::neurun::backend::operand::ITensor &out_tensor) { + auto input_buffer = in_tensor.buffer(); + auto input_size = in_tensor.total_size(); + auto output_buffer = out_tensor.buffer(); + if (_type == ir::operation::Permute::Type::COPY) + { + assert(in_tensor.layout() == out_tensor.layout()); + if (!in_tensor.has_padding() && !out_tensor.has_padding()) + { + assert(input_size == out_tensor.total_size()); + memcpy(output_buffer, input_buffer, input_size); + return; + } + } + switch (rank) + { + case 0: + case 1: + { + const int32_t copy_len = _output_shape.dim(0); + + memcpy(output_buffer, input_buffer, copy_len); + break; + } + case 2: + { + const int32_t copy_len = _output_shape.dim(1); + + for (auto i = 0; i < _output_shape.dim(0); ++i) + { + neurun::util::Coordinates coords{i, 0}; + memcpy(output_buffer + out_tensor.calcOffset(coords), + input_buffer + in_tensor.calcOffset(coords), copy_len * sizeof(T)); + } + break; + } + case 3: + { + const int32_t copy_len = _output_shape.dim(2); + + for (auto i = 0; i < _output_shape.dim(0); ++i) + { + for (auto j = 0; j < _output_shape.dim(1); ++j) + { + neurun::util::Coordinates coords{i, j, 0}; + memcpy(output_buffer + out_tensor.calcOffset(coords), + input_buffer + in_tensor.calcOffset(coords), copy_len * sizeof(T)); + } + } + break; + } + case 4: + { + // TODO Unify permute type and remove switch case + switch (_type) + { + case ir::operation::Permute::Type::NHWC_TO_NCHW: + { + for (auto n = 0; n < _output_shape.dim(0); ++n) + { + for (auto c = 0; c < _output_shape.dim(1); ++c) + { + for (auto h = 0; h < _output_shape.dim(2); ++h) + { + for (auto w = 0; w < _output_shape.dim(3); ++w) + { + const neurun::util::Coordinates in_coords{n, h, w, c}; + const auto out_coords = + convertCoordinates(in_coords, in_tensor.layout(), out_tensor.layout()); + const auto value = + *reinterpret_cast<T *>(input_buffer + in_tensor.calcOffset(in_coords)); + *reinterpret_cast<T *>(output_buffer + out_tensor.calcOffset(out_coords)) = + value; + } + } + } + } + break; + } + case ir::operation::Permute::Type::NCHW_TO_NHWC: + { + for (auto n = 0; n < _output_shape.dim(0); ++n) + { + for (auto h = 0; h < _output_shape.dim(1); ++h) + { + for (auto w = 0; w < _output_shape.dim(2); ++w) + { + for (auto c = 0; c < _output_shape.dim(3); ++c) + { + const neurun::util::Coordinates in_coords{n, c, h, w}; + const auto out_coords = + convertCoordinates(in_coords, in_tensor.layout(), out_tensor.layout()); + const auto value = + *reinterpret_cast<T *>(input_buffer + in_tensor.calcOffset(in_coords)); + *reinterpret_cast<T *>(output_buffer + out_tensor.calcOffset(out_coords)) = + value; + } + } + } + } + break; + } + case ir::operation::Permute::Type::COPY: + { + const int32_t copy_len = _output_shape.dim(3); + + for (auto i = 0; i < _output_shape.dim(0); ++i) + { + for (auto j = 0; j < _output_shape.dim(1); ++j) + { + for (auto k = 0; k < _output_shape.dim(2); ++k) + { + neurun::util::Coordinates coords{i, j, k, 0}; + memcpy(output_buffer + out_tensor.calcOffset(coords), + input_buffer + in_tensor.calcOffset(coords), copy_len * sizeof(T)); + } + } + } + break; + } + default: + throw std::runtime_error("NYI"); + break; + } + break; + } + default: + throw std::runtime_error("NYI"); + break; + } + }); + }; + _input->access(fn); + } + +private: + std::shared_ptr<backend::operand::ITensor> _input{nullptr}; + std::shared_ptr<backend::operand::ITensor> _output{nullptr}; + ir::Shape _output_shape{}; + ir::operation::Permute::Type _type{ir::operation::Permute::Type::COPY}; + ir::DataType _dataType{ir::DataType::FLOAT32}; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_PERMUTE_LAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/ReshapeLayer.cc b/runtime/neurun/backend/cpu/kernel/ReshapeLayer.cc new file mode 100644 index 000000000..caeee9f12 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/ReshapeLayer.cc @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ReshapeLayer.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +ReshapeLayer::ReshapeLayer() : _inputData(), _outputData(), _inputDescr(), _outputDescr() +{ + // DO NOTHING +} + +void ReshapeLayer::reshapeGeneric() +{ + size_t count = sizeOfData(_inputDescr.type, _inputDescr.dimensions); + memcpy(_outputData.v, _inputData.v, count); +} + +void ReshapeLayer::configure(uint8_t *inputData, const TensorDescriptor &inputDescr, + uint8_t *outputData, const TensorDescriptor &outputDescr) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void ReshapeLayer::run() { reshapeGeneric(); } + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/ReshapeLayer.h b/runtime/neurun/backend/cpu/kernel/ReshapeLayer.h new file mode 100644 index 000000000..25dd851b2 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/ReshapeLayer.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_RESHAPELAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_RESHAPELAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class ReshapeLayer : public ::neurun::exec::IFunction +{ +public: + ReshapeLayer(); + +public: + void reshapeGeneric(); + + void configure(uint8_t *inputData, const TensorDescriptor &inputDescr, uint8_t *outputData, + const TensorDescriptor &outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _outputDescr; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_RESHAPELAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.cc b/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.cc new file mode 100644 index 000000000..58ba109b4 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.cc @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SoftMaxLayer.h" + +#include <cker/operation/SoftMax.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +SoftMaxLayer::SoftMaxLayer() + : _inputData(), _outputData(), _beta(0.0), _inputDescr(), _outputDescr(), + _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +// Performs softmax along the input of size (input_size * batch_size). +void Softmax(const float *in, const int input_size, const int batch_size, const float beta, + float *out) +{ + assert(input_size > 0); + + // For each batch + for (int b = 0; b < batch_size; b++) + { + // Find the max coeff. + float max_coeff = in[0]; + for (int i = 1; i < input_size; i++) + { + if (in[i] > max_coeff) + max_coeff = in[i]; + } + + // Compute the normalized sum of exps. + float exp_sum = 0.0; + for (int i = 0; i < input_size; i++) + { + out[i] = std::exp((in[i] - max_coeff) * beta); + exp_sum += out[i]; + } + + // Divide by the sum of exps. + float reciprocal_sum_exp = 1.f / exp_sum; + for (int i = 0; i < input_size; i++) + { + out[i] *= reciprocal_sum_exp; + } + + // Advance in and out pointers for the next batch. + in += input_size; + out += input_size; + } +} + +void SoftMaxLayer::softmaxFloat32() +{ + TensorDescriptor descrIn4D; + + if (getNumberOfDimensions(_inputDescr) == 2) + { + uint32_t batch_size = getSizeOfDimension(_inputDescr, 0); + if (batch_size == 0) + throw std::runtime_error("batch_size should not be 0"); + + uint32_t input_size = getNumberOfElements(_inputDescr) / batch_size; + Softmax(_inputData.f, input_size, batch_size, _beta, _outputData.f); + } + else if (getNumberOfDimensions(_inputDescr) == 4) + { + nnfw::cker::SoftmaxParams op_params; + op_params.beta = _beta; + nnfw::cker::Softmax(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f); + } + else + { + throw std::runtime_error{"only 2D and 4D tensors supported"}; + } +} + +void SoftMaxLayer::softmaxQuant8() +{ + TensorDescriptor descrIn4D = _inputDescr; + + if (getNumberOfDimensions(_inputDescr) == 2) + { + uint32_t batch_size = getSizeOfDimension(_inputDescr, 0); + if (batch_size == 0) + throw std::runtime_error("batch_size should not be 0"); + + uint32_t input_size = getNumberOfElements(_inputDescr) / batch_size; + descrIn4D.dimensions = {batch_size, 1, 1, input_size}; + } + else if (getNumberOfDimensions(_inputDescr) == 4) + { + descrIn4D = _inputDescr; + } + else + { + throw std::runtime_error{"only 2D and 4D tensors supported"}; + } + if (_outputDescr.offset != 0 || _outputDescr.scale != 1.f / 256) + { + throw std::runtime_error{"incorrect scale / offset for output"}; + } + static const int32_t kScaledDiffIntegerBits = 5; + const double input_beta_real_multiplier = std::min( + 1.0 * _beta * _inputDescr.scale * (1 << (31 - kScaledDiffIntegerBits)), (1ll << 31) - 1.0); + int32_t input_multiplier = 0; + int32_t input_left_shift = 0; + QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier, &input_multiplier, + &input_left_shift); + float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift); + + nnfw::cker::SoftmaxParams op_params; + op_params.input_multiplier = input_multiplier; + op_params.input_left_shift = input_left_shift; + op_params.diff_min = diff_min; + nnfw::cker::Softmax(op_params, convertTensorDescriptorToCkerShape(descrIn4D), _inputData.u8, + convertTensorDescriptorToCkerShape(descrIn4D), _outputData.u8); +} + +void SoftMaxLayer::configure(uint8_t *inputData, const TensorDescriptor &inputDescr, + const float beta, uint8_t *outputData, + const TensorDescriptor &outputDescr) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _outputData.u8 = outputData; + _outputDescr = outputDescr; + _beta = beta; +} + +void SoftMaxLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + softmaxFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + softmaxQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.h b/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.h new file mode 100644 index 000000000..4723afb72 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_SOFTMAXLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_SOFTMAXLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class SoftMaxLayer : public ::neurun::exec::IFunction +{ +public: + SoftMaxLayer(); + +public: + void softmaxFloat32(); + + void softmaxQuant8(); + + void configure(uint8_t *inputData, const TensorDescriptor &inputDescr, const float beta, + uint8_t *outputData, const TensorDescriptor &outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _outputData; + + float _beta; + + TensorDescriptor _inputDescr; + TensorDescriptor _outputDescr; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_SOFTMAXLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/SubLayer.cc b/runtime/neurun/backend/cpu/kernel/SubLayer.cc new file mode 100644 index 000000000..c6f7188e0 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/SubLayer.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SubLayer.h" + +#include <cker/operation/BinaryArithmeticOps.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +void SubLayer::subFloat32() +{ + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + nnfw::cker::BinaryArithmeticOpParam op_params; + op_params.float_activation_max = output_activation_max; + op_params.float_activation_min = output_activation_min; + const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) { + return a - b; + }; + + if (!HaveSameShapes(&_lhsDescr, &_rhsDescr)) + { + nnfw::cker::BroadcastBinaryArithmeticOpSlow( + op_params, convertToExtendedCkerShape(_lhsDescr), _lhsData.f, + convertToExtendedCkerShape(_rhsDescr), _rhsData.f, convertToExtendedCkerShape(_outputDescr), + _outputData.f, fn); + return; + } + + nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr), + _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr), + _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr), + _outputData.f, fn); +} + +void SubLayer::subQuant8() +{ + int32_t output_activation_min, output_activation_max; + CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min, + &output_activation_max); + // nnfw::cker::SubParam op_params; + // op_params.quantized_activation_max = output_activation_max; + // op_params.quantized_activation_min = output_activation_min; + + // cker quant8 sub is not implemented yet + throw std::runtime_error{"NYI"}; +} + +void SubLayer::configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData, + const TensorDescriptor &rhsDescr, const ir::Activation activation, + uint8_t *outputData, const TensorDescriptor &outputDescr) +{ + _lhsData.u8 = lhsData; + _lhsDescr = lhsDescr; + _rhsData.u8 = rhsData; + _rhsDescr = rhsDescr; + _inputType = lhsDescr.type; + _activation = activation; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void SubLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + subFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + subQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/SubLayer.h b/runtime/neurun/backend/cpu/kernel/SubLayer.h new file mode 100644 index 000000000..c9abdb48c --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/SubLayer.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_SUBLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_SUBLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class SubLayer : public ::neurun::exec::IFunction +{ +public: + SubLayer() : _lhsData(), _rhsData(), _outputData(), _lhsDescr(), _rhsDescr(), _outputDescr() + { + // DO NOTHING + } + +public: + void subFloat32(); + + void subQuant8(); + + void configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData, + const TensorDescriptor &rhsDescr, const ir::Activation activation, + uint8_t *outputData, const TensorDescriptor &outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _lhsData; + DataPtr _rhsData; + DataPtr _outputData; + + TensorDescriptor _lhsDescr; + TensorDescriptor _rhsDescr; + TensorDescriptor _outputDescr; + + ir::Activation _activation{ir::Activation::NONE}; + + OperandType _inputType{OperandType::FLOAT32}; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_SUBLAYER_H__ diff --git a/runtime/neurun/backend/cpu/operand/Tensor.cc b/runtime/neurun/backend/cpu/operand/Tensor.cc new file mode 100644 index 000000000..21d4a9dce --- /dev/null +++ b/runtime/neurun/backend/cpu/operand/Tensor.cc @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Tensor.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace operand +{ + +size_t Tensor::calcOffset(const neurun::util::Coordinates &coords) const +{ + size_t rank = num_dimensions(); + size_t offset = 0; + for (size_t i = 0; i < rank; ++i) + { + offset = offset * dimension(i) + coords[i]; + } + offset *= sizeOfDataType(data_type()); + return offset; +} + +void Tensor::access(const std::function<void(ITensor &)> &fn) { fn(*this); } + +} // namespace operand +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/operand/Tensor.h b/runtime/neurun/backend/cpu/operand/Tensor.h new file mode 100644 index 000000000..dec680873 --- /dev/null +++ b/runtime/neurun/backend/cpu/operand/Tensor.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_OPERAND_TENSOR_H__ +#define __NEURUN_BACKEND_CPU_OPERAND_TENSOR_H__ + +#include <backend/operand/ITensor.h> +#include "ir/OperandInfo.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace operand +{ + +class Tensor : public ::neurun::backend::operand::ITensor +{ +public: + Tensor() = delete; + +public: + Tensor(const ir::OperandInfo &info) : _info(info) + { + // DO NOTHING + } + +public: + void setBuffer(uint8_t *buffer) { _buffer = buffer; } + ir::DataType data_type() const { return _info.typeInfo().type(); } + +public: + uint8_t *buffer() const override { return _buffer; } + /** + * @brief Get dimension by index + * + * @param index Index to get diemension + * @return size_t Dimension at index + * @note N : dimension(0) + * H : dimension(1) + * W : dimension(2) + * C : dimension(3) + */ + size_t dimension(size_t index) const override { return _info.shape().dim(index); } + size_t num_dimensions() const override { return _info.shape().rank(); } + size_t total_size() const override { return _info.total_size(); } + size_t calcOffset(const neurun::util::Coordinates &coords) const override; + ir::Layout layout() const override { return ir::Layout::NHWC; } + bool has_padding() const override { return false; } + void access(const std::function<void(ITensor &tensor)> &fn) final; + +private: + ir::OperandInfo _info; + uint8_t *_buffer = nullptr; +}; + +} // namespace operand +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_OPERAND_TENSOR_H__ diff --git a/runtime/neurun/backend/cpu_common/CMakeLists.txt b/runtime/neurun/backend/cpu_common/CMakeLists.txt new file mode 100644 index 000000000..5b6161ba7 --- /dev/null +++ b/runtime/neurun/backend/cpu_common/CMakeLists.txt @@ -0,0 +1,28 @@ +file(GLOB SOURCES "*.cc") +file(GLOB_RECURSE TESTS "*.test.cc") +list(REMOVE_ITEM SOURCES ${TESTS}) + +add_library(${LIB_NEURUN_BACKEND_CPU_COMMON} STATIC ${SOURCES}) + +target_include_directories(${LIB_NEURUN_BACKEND_CPU_COMMON} PUBLIC ${NEURUN_INCLUDE_DIR}) +target_include_directories(${LIB_NEURUN_BACKEND_CPU_COMMON} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(${LIB_NEURUN_BACKEND_CPU_COMMON} PUBLIC nnfw_lib_cpp14) +target_link_libraries(${LIB_NEURUN_BACKEND_CPU_COMMON} PRIVATE nnfw_lib_misc) +target_link_libraries(${LIB_NEURUN_BACKEND_CPU_COMMON} PRIVATE neurun_core) +target_link_libraries(${LIB_NEURUN_BACKEND_CPU_COMMON} PRIVATE nnfw_common) +target_link_libraries(${LIB_NEURUN_BACKEND_CPU_COMMON} PRIVATE nnfw_coverage) + +set_target_properties(${LIB_NEURUN_BACKEND_CPU_COMMON} PROPERTIES POSITION_INDEPENDENT_CODE ON) +set_target_properties(${LIB_NEURUN_BACKEND_CPU_COMMON} PROPERTIES OUTPUT_NAME backend_cpu_common) + +# Unit Tests +set(TEST_NEURUN_BACKEND_CPU_COMMON test_neurun_backend_cpu_common) + +add_executable(${TEST_NEURUN_BACKEND_CPU_COMMON} ${TESTS}) + +target_link_libraries(${TEST_NEURUN_BACKEND_CPU_COMMON} ${LIB_NEURUN_BACKEND_CPU_COMMON}) +target_link_libraries(${TEST_NEURUN_BACKEND_CPU_COMMON} gtest gtest_main dl ${LIB_PTHREAD}) +target_include_directories(${TEST_NEURUN_BACKEND_CPU_COMMON} PRIVATE ${NEURUN_INCLUDE_DIR}) + +add_test(${TEST_NEURUN_BACKEND_CPU_COMMON} ${TEST_NEURUN_BACKEND_CPU_COMMON}) +install(TARGETS ${TEST_NEURUN_BACKEND_CPU_COMMON} DESTINATION unittest) diff --git a/runtime/neurun/backend/cpu_common/MemoryPlanner.cc b/runtime/neurun/backend/cpu_common/MemoryPlanner.cc new file mode 100644 index 000000000..19961362e --- /dev/null +++ b/runtime/neurun/backend/cpu_common/MemoryPlanner.cc @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MemoryPlanner.h" +#include "util/logging.h" +#include <cassert> + +namespace neurun +{ +namespace backend +{ +namespace cpu_common +{ + +Allocator::Allocator(uint32_t capacity) +{ + _base = nnfw::cpp14::make_unique<uint8_t[]>(capacity); + + VERBOSE(ALLOC) << "allocation capacity: " << capacity << std::endl; + VERBOSE(ALLOC) << "base pointer: " << static_cast<void *>(_base.get()) << std::endl; +} + +void BumpPlanner::claim(const ir::OperandIndex &ind, size_t size) +{ + assert(size != 0); + + Block blk{_capacity, size}; + _mem_plans[ind] = blk; + _capacity += size; + + VERBOSE(BP_PLANNER) << "CLAIM(#" << ind.value() << "): " << blk.offset << ", " << blk.size + << std::endl; +} + +void BumpPlanner::release(const ir::OperandIndex &ind) +{ + VERBOSE(BP_PLANNER) << "RELEASE(#" << ind.value() << "): " + << "NOTHING does" << std::endl; +} + +// There are some assumptions for claiming memory(== making a reservation for memory). +// 1. About _claim_table(std::map). +// - The table's data structure is std::map so that it always sorts +// value(OperandIndex) by key(base_offset). +// - This claim() inserts key/value into _claim_table and the release() removes the key/value from +// _claim_table. +// - _claim_table shows the memory status at a certain point in time. Therefore, +// - If _claim_table has an offset and a certain size at a certain point in time, +// it means the place at the offset has been already claimed(== can't claim now. need to find +// someplace new). +// - If _claim_table doesn't have any element for an offset and a certain size at a certain +// point in time, it means the place at the offset can be claimed. +// 2. In the loop for _claim_table, we can assume the current claim_base_offset value is bigger than +// the previous claim_base_offset. +void FirstFitPlanner::claim(const ir::OperandIndex &ind, size_t size) +{ + assert(size != 0); + + // Find the right position for claiming + uint32_t next_offset = 0; + for (auto &mem_claim : _claim_table) + { + auto claimed_base_offset = mem_claim.first; + auto claimed_size = _mem_plans[mem_claim.second].size; + if (next_offset + size <= claimed_base_offset) + { + break; + } + else + { + next_offset = claimed_base_offset + claimed_size; + } + } + + // Now next_offset is set to the proper offset + _claim_table[next_offset] = ind; + _mem_plans[ind] = {next_offset, size}; + + VERBOSE(FF_PLANNER) << "claim(#" << ind.value() << "): [+" << next_offset << ", " << size << "sz]" + << std::endl; + + if (_capacity < next_offset + size) + { + _capacity = next_offset + size; + } +} + +void FirstFitPlanner::release(const ir::OperandIndex &ind) +{ + for (auto it = _claim_table.cbegin(); it != _claim_table.cend(); ++it) + { + if (it->second == ind) + { + uint32_t offset = it->first; + uint32_t index = ind.value(); + uint32_t size = _mem_plans[ind].size; + + _claim_table.erase(it); + + VERBOSE(FF_PLANNER) << "release(#" << index << "): [+" << offset << ", " << size << "sz]" + << std::endl; + return; + } + } + assert(!"Cannot release for given index. It has been not claimed or released already."); +} + +WICPlanner::WICPlanner() + : _initialized(false), _capacity(0), _mem_plans(), _live_operands(), _interference_graph(), + _map_size_to_operands(), _claim_table() +{ + // DO NOTHING +} + +void WICPlanner::claim(const ir::OperandIndex &ind, size_t size) +{ + assert(size != 0); + + _map_size_to_operands.insert({size, ind}); + for (auto &live_operand : _live_operands) + { + _interference_graph[live_operand].insert(ind); + _interference_graph[ind].insert(live_operand); + } + _live_operands.insert(ind); + + VERBOSE(WIC_PLANNER) << "claim(#" << ind.value() << "): [" << size << "sz]" << std::endl; +} + +void WICPlanner::release(const ir::OperandIndex &ind) +{ + _live_operands.erase(ind); + VERBOSE(WIC_PLANNER) << "release(#" << ind.value() << ")" << std::endl; +} + +/* + * Build memory plans using liveness and size of operands + * 1. Build inference graph at claim + * - Two operands interfere if they have overlapped live range + * 2. Sort operands descending order of size + * - Use std::multimap to sort operands + * 3. Allocate memory block for sorted operands + * - Find free memory block which does not overlap with interfered operands + */ +void WICPlanner::buildMemoryPlans() +{ + for (auto &size_to_operand : _map_size_to_operands) + { + uint32_t size = size_to_operand.first; + ir::OperandIndex ind = size_to_operand.second; + VERBOSE(WIC_PLANNER) << "build_plan(#" << ind.value() << "): [" << size << "sz]" << std::endl; + + // Find firstfit which does not interfere with live operands + uint32_t next_offset = 0; + if (_interference_graph.find(ind) != _interference_graph.end()) + { + std::unordered_set<ir::OperandIndex> &interferences = _interference_graph.find(ind)->second; + for (auto &mem_claim : _claim_table) + { + if (interferences.find(mem_claim.second) != interferences.end()) + { + auto claimed_base_offset = mem_claim.first; + auto claimed_size = _mem_plans[mem_claim.second].size; + VERBOSE(WIC_PLANNER) << "interfere (#" << mem_claim.second.value() << "): [+" + << claimed_base_offset << ", " << claimed_size << "sz]" << std::endl; + if (next_offset + size <= claimed_base_offset) + { + break; + } + else if (next_offset < claimed_base_offset + claimed_size) + { + next_offset = claimed_base_offset + claimed_size; + } + } + } + } + else + { + VERBOSE(WIC_PLANNER) << "No interference" << std::endl; + } + + _claim_table.insert({next_offset, ind}); + _mem_plans[ind] = {next_offset, size}; + VERBOSE(WIC_PLANNER) << "alloc(#" << ind.value() << "): [+" << next_offset << ", " << size + << "sz]" << std::endl; + + if (_capacity < next_offset + size) + { + _capacity = next_offset + size; + } + } + _initialized = true; + _interference_graph.clear(); + _map_size_to_operands.clear(); + _claim_table.clear(); +} + +WICPlanner::MemoryPlans &WICPlanner::memory_plans() +{ + if (!_initialized) + buildMemoryPlans(); + return _mem_plans; +} + +} // namespace cpu_common +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu_common/MemoryPlanner.h b/runtime/neurun/backend/cpu_common/MemoryPlanner.h new file mode 100644 index 000000000..c4f5e6a9e --- /dev/null +++ b/runtime/neurun/backend/cpu_common/MemoryPlanner.h @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file       MemoryPlanner.h + * @brief      This file contains Memory Planning related classes + */ + +#ifndef __NEURUN_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__ +#define __NEURUN_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__ + +#include <map> +#include <unordered_set> +#include <cpp14/memory.h> + +#include "ir/OperandIndexMap.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu_common +{ + +/** + * @brief Structure to have memory offset and size + */ +struct Block +{ + uint32_t offset; + size_t size; +}; + +/** + * @brief Class to allocate memory + */ +class Allocator +{ +public: + Allocator(uint32_t capacity); + /** + * @brief Get memory base pointer + * @return base pointer + */ + uint8_t *base() const { return _base.get(); } + void release() { _base.reset(); } + +private: + std::unique_ptr<uint8_t[]> _base; +}; + +/** + * @brief Interface to plan memory + */ +struct IMemoryPlanner +{ + using MemoryPlans = ir::OperandIndexMap<Block>; + + /** + * @brief Claim memory for operand + * @param[in] index The operand index + * @param[in] size The size of the memory + */ + virtual void claim(const ir::OperandIndex &, size_t) = 0; + /** + * @brief Release memory for operand + * @param[in] index The operand index + */ + virtual void release(const ir::OperandIndex &) = 0; + /** + * @brief Get capacity for memory planning + * @return The value of capacity + */ + virtual uint32_t capacity() = 0; + /** + * @brief Get MemoryPlans + * @return MemoryPlans + */ + virtual MemoryPlans &memory_plans() = 0; + + virtual ~IMemoryPlanner() = default; +}; + +/** + * @brief Class to plan memory by bump way + */ +class BumpPlanner : public IMemoryPlanner +{ +public: + /** + * @brief Claim memory for operand by bump way + * @param[in] index The operand index + * @param[in] size The size of the memory + */ + void claim(const ir::OperandIndex &, size_t) override; + /** + * @brief Release memory for operand by bump way + * @param[in] index The operand index + */ + void release(const ir::OperandIndex &) override; + /** + * @brief Get capacity for memory planning + * @return The value of capacity + */ + uint32_t capacity() override { return _capacity; } + /** + * @brief Get MemoryPlans + * @return MemoryPlans + */ + MemoryPlans &memory_plans() override { return _mem_plans; } + +private: + uint32_t _capacity = 0; + MemoryPlans _mem_plans; +}; + +/** + * @brief Class to plan memory by firstfit way + */ +class FirstFitPlanner : public IMemoryPlanner +{ +public: + /** + * @brief Claim memory for operand by firstfit way + * @param[in] index The operand index + * @param[in] size The size of the memory + */ + void claim(const ir::OperandIndex &, size_t) override; + /** + * @brief Release memory for operand by firstfit way + * @param[in] index The operand index + */ + void release(const ir::OperandIndex &) override; + /** + * @brief Get capacity for memory planning + * @return The value of capacity + */ + uint32_t capacity() override { return _capacity; } + /** + * @brief Get MemoryPlans + * @return MemoryPlans + */ + MemoryPlans &memory_plans() override { return _mem_plans; } + +private: + uint32_t _capacity = 0; + MemoryPlans _mem_plans; + // Use std::map because claim() assumes that _claim_table is sorted by uint32_t(base_offset) + std::map<uint32_t, ir::OperandIndex> _claim_table; +}; + +/** + * @brief Class to plan memory by Weighted Interval Color algorithm + */ +class WICPlanner : public IMemoryPlanner +{ +public: + WICPlanner(); + + /** + * @brief Claim memory for operand by WIC algorithm + * @param[in] index The operand index + * @param[in] size The size of the memory + */ + void claim(const ir::OperandIndex &, size_t) override; + /** + * @brief Release memory for operand by WIC algorithm + * @param[in] index The operand index + */ + void release(const ir::OperandIndex &) override; + /** + * @brief Get capacity for memory planning + * @return The value of capacity + */ + uint32_t capacity() override + { + if (!_initialized) + buildMemoryPlans(); + return _capacity; + } + /** + * @brief Get MemoryPlans + * @return MemoryPlans + */ + MemoryPlans &memory_plans() override; + +private: + void buildMemoryPlans(); + + bool _initialized; + uint32_t _capacity; + MemoryPlans _mem_plans; + std::unordered_set<ir::OperandIndex> _live_operands; + ir::OperandIndexMap<std::unordered_set<ir::OperandIndex>> _interference_graph; + // Sort operands by descending order of size + std::multimap<uint32_t, ir::OperandIndex, std::greater<uint32_t>> _map_size_to_operands; + std::multimap<uint32_t, ir::OperandIndex> _claim_table; +}; + +} // namespace cpu_common +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__ diff --git a/runtime/neurun/backend/cpu_common/MemoryPlanner.test.cc b/runtime/neurun/backend/cpu_common/MemoryPlanner.test.cc new file mode 100644 index 000000000..b2be7db24 --- /dev/null +++ b/runtime/neurun/backend/cpu_common/MemoryPlanner.test.cc @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include "MemoryPlanner.h" +#include "ir/Index.h" + +TEST(Allocator, allocate_test) +{ + ::neurun::backend::cpu_common::Allocator allocator(1024); + ASSERT_NE(allocator.base(), nullptr); +} + +TEST(BumpPlanner, claim_test) +{ + ::neurun::backend::cpu_common::BumpPlanner planner; + + auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) { + neurun::ir::OperandIndex mem_idx(index); + planner.claim(mem_idx, size); + auto mem_blk = planner.memory_plans()[mem_idx]; + ASSERT_EQ(mem_blk.offset, expected_offset); + ASSERT_EQ(mem_blk.size, size); + }; + + claim(0, 10, 0); + claim(1, 20, 10); + claim(2, 30, 30); +} + +TEST(FirstFitPlanner, claim_release_test) +{ + ::neurun::backend::cpu_common::FirstFitPlanner planner; + + auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) { + neurun::ir::OperandIndex mem_idx(index); + planner.claim(mem_idx, size); + auto mem_blk = planner.memory_plans()[mem_idx]; + ASSERT_EQ(mem_blk.offset, expected_offset); + ASSERT_EQ(mem_blk.size, size); + }; + + auto release = [&planner](uint32_t index) { + neurun::ir::OperandIndex mem_idx(index); + planner.release(mem_idx); + }; + + // 0 CLAIM - 10 + claim(0, 10, 0); + + // 1 CLAIM - 20 + claim(1, 20, 10); + + // 2 CLAIM - 30 + claim(2, 30, 30); + + // 0 RELEASE - 10 + release(0); + + // 3 CLAIM - 20 + claim(3, 20, 60); + + // 4 CLAIM - 5 + claim(4, 5, 0); + + // 5 CLAIM - 10 + claim(5, 10, 80); + + // 6 CLAIM - 5 + claim(6, 5, 5); + + // 2 RELEASE - 30 + release(2); + + // 7 CLAIM - 35 + claim(7, 35, 90); + + // 8 CLAIM - 10 + claim(8, 10, 30); + + // 4 RELEASE - 5 + release(4); + + // 9 CLAIM - 10 + claim(9, 10, 40); + + // 10 CLAIM - 10 + claim(10, 10, 50); + + // 6 RELEASE + release(6); + + // 1 RELEASE + release(1); + + // 8 RELEASE + release(8); + + // 9 RELEASE + release(9); + + // 10 RELEASE + release(10); + + // 3 RELEASE + release(3); + + // 5 RELEASE + release(5); + + // 7 RELEASE + release(7); +} + +TEST(WICPlanner, claim_release_test) +{ + ::neurun::backend::cpu_common::WICPlanner planner; + + auto claim = [&planner](uint32_t index, size_t size) { + neurun::ir::OperandIndex mem_idx(index); + planner.claim(mem_idx, size); + }; + + auto release = [&planner](uint32_t index) { + neurun::ir::OperandIndex mem_idx(index); + planner.release(mem_idx); + }; + + auto verify = [&planner](uint32_t index, uint32_t size, uint32_t expected_offset) { + neurun::ir::OperandIndex mem_idx(index); + auto mem_blk = planner.memory_plans()[mem_idx]; + ASSERT_EQ(mem_blk.offset, expected_offset); + ASSERT_EQ(mem_blk.size, size); + }; + + auto capacity = [&planner](uint32_t expected_capacity) { + auto actual_capacity = planner.capacity(); + ASSERT_EQ(actual_capacity, expected_capacity); + }; + + claim(0, 20); + claim(1, 5); + release(0); + claim(2, 10); + release(1); + claim(3, 10); + release(2); + claim(4, 10); + release(3); + claim(5, 20); + release(4); + claim(6, 20); + release(5); + release(7); + + // VERIFY 0 - 0 + verify(0, 20, 0); + + // VERIFY 1 - 20 + verify(1, 5, 20); + + // VERIFY 2 - 0 + verify(2, 10, 0); + + // VERIFY 3 - 10 + verify(3, 10, 10); + + // VERIFY 4 - 20 + verify(4, 10, 20); + + // VERIFY 5 - 0 + verify(5, 20, 0); + + // VERIFY 6 - 20 + verify(6, 20, 20); + + // CAPACITY - 40 + capacity(40); +} diff --git a/runtime/neurun/backend/cpu_common/MemoryPlannerFactory.cc b/runtime/neurun/backend/cpu_common/MemoryPlannerFactory.cc new file mode 100644 index 000000000..9da987aa4 --- /dev/null +++ b/runtime/neurun/backend/cpu_common/MemoryPlannerFactory.cc @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MemoryPlannerFactory.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu_common +{ + +MemoryPlannerFactory &MemoryPlannerFactory::get() +{ + static MemoryPlannerFactory instance; + return instance; +} + +IMemoryPlanner *MemoryPlannerFactory::create(const std::string &key) +{ + if (key == "FirstFit") + { + return new FirstFitPlanner; + } + else if (key == "Bump") + { + return new BumpPlanner; + } + else if (key == "WIC") + { + return new WICPlanner; + } + return new FirstFitPlanner; // Default Planner +} + +} // namespace cpu_common +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu_common/MemoryPlannerFactory.h b/runtime/neurun/backend/cpu_common/MemoryPlannerFactory.h new file mode 100644 index 000000000..829600e27 --- /dev/null +++ b/runtime/neurun/backend/cpu_common/MemoryPlannerFactory.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__ +#define __NEURUN_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__ + +#include "MemoryPlanner.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu_common +{ + +class MemoryPlannerFactory +{ +public: + static MemoryPlannerFactory &get(); + +private: + MemoryPlannerFactory() = default; + +public: + IMemoryPlanner *create(const std::string &key); +}; + +} // namespace cpu_common +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__ diff --git a/runtime/neurun/backend/hi_perf_cpu/CMakeLists.txt b/runtime/neurun/backend/hi_perf_cpu/CMakeLists.txt new file mode 100644 index 000000000..816edba5e --- /dev/null +++ b/runtime/neurun/backend/hi_perf_cpu/CMakeLists.txt @@ -0,0 +1,44 @@ +set(LIB_NEURUN_BACKEND_HI_PERF_CPU neurun_backend_hi_perf) + +nnfw_find_package(NNPACK QUIET) + +option(BUILD_NEURUN_HI_PERF_CPU_BACKEND + "Build neurun HI_PERF_CPU backend" + ${NNPACK_FOUND} # Default value when there is no explicit user request +) + +message(STATUS "Build neurun HI_PERF_CPU backend: ${BUILD_NEURUN_HI_PERF_CPU_BACKEND}") + +if(NOT BUILD_NEURUN_HI_PERF_CPU_BACKEND) + return() +endif(NOT BUILD_NEURUN_HI_PERF_CPU_BACKEND) + +file(GLOB_RECURSE SOURCES "*.cc") +file(GLOB_RECURSE TESTS "*.test.cc") +list(REMOVE_ITEM SOURCES ${TESTS}) + +add_library(${LIB_NEURUN_BACKEND_HI_PERF_CPU} SHARED ${SOURCES}) + +target_link_libraries(${LIB_NEURUN_BACKEND_HI_PERF_CPU} PRIVATE nnfw_lib_misc) +target_link_libraries(${LIB_NEURUN_BACKEND_HI_PERF_CPU} PRIVATE nnfw_lib_cpp14) +target_link_libraries(${LIB_NEURUN_BACKEND_HI_PERF_CPU} PRIVATE neurun_core) +target_link_libraries(${LIB_NEURUN_BACKEND_HI_PERF_CPU} PRIVATE nnfw_common) +target_link_libraries(${LIB_NEURUN_BACKEND_HI_PERF_CPU} PRIVATE nnpack pthreadpool cpuinfo) +target_link_libraries(${LIB_NEURUN_BACKEND_HI_PERF_CPU} PRIVATE nnfw_coverage) +target_include_directories(${LIB_NEURUN_BACKEND_HI_PERF_CPU} PRIVATE ${NNPACK_INCLUDE_DIRS}) + +set_target_properties(${LIB_NEURUN_BACKEND_HI_PERF_CPU} PROPERTIES OUTPUT_NAME backend_NNPACK) + +install(TARGETS ${LIB_NEURUN_BACKEND_HI_PERF_CPU} DESTINATION lib) + +# Unit Tests +set(TEST_NEURUN_BACKEND_HI_PERF_CPU test_neurun_backend_hi_perf) + +add_executable(${TEST_NEURUN_BACKEND_HI_PERF_CPU} ${TESTS}) + +target_link_libraries(${TEST_NEURUN_BACKEND_HI_PERF_CPU} ${LIB_NEURUN_BACKEND_HI_PERF_CPU}) +target_link_libraries(${TEST_NEURUN_BACKEND_HI_PERF_CPU} gtest gtest_main ${LIB_PTHREAD}) +target_link_libraries(${TEST_NEURUN_BACKEND_HI_PERF_CPU} nnpack) + +add_test(${TEST_NEURUN_BACKEND_HI_PERF_CPU} ${TEST_NEURUN_BACKEND_HI_PERF_CPU}) +install(TARGETS ${TEST_NEURUN_BACKEND_HI_PERF_CPU} DESTINATION unittest) diff --git a/runtime/neurun/backend/hi_perf_cpu/HighPerformanceBackend.test.cc b/runtime/neurun/backend/hi_perf_cpu/HighPerformanceBackend.test.cc new file mode 100644 index 000000000..625fe1c36 --- /dev/null +++ b/runtime/neurun/backend/hi_perf_cpu/HighPerformanceBackend.test.cc @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include "nnpack.h" + +TEST(High_performance_backend, NNPACK_Test) +{ + // Check that it is possible to import + const enum nnp_status init_status = nnp_initialize(); + + // One of the allowed nnp status codes + ASSERT_GE(init_status, 0); + ASSERT_LE(init_status, 54); + + // If it is possible to test, test relu + if (init_status == nnp_status_success) + { + float in[] = {-1, 1, -1, 1}; + float out[4]; + nnp_relu_output(1, 4, in, out, 0, nullptr); + for (int i = 0; i < 4; i++) + { + ASSERT_EQ(out[i], in[i] >= 0 ? in[i] : 0); + } + } + nnp_deinitialize(); +} diff --git a/runtime/neurun/backend/hi_perf_cpu/KernelGenerator.cc b/runtime/neurun/backend/hi_perf_cpu/KernelGenerator.cc new file mode 100644 index 000000000..a13fe12b9 --- /dev/null +++ b/runtime/neurun/backend/hi_perf_cpu/KernelGenerator.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "KernelGenerator.h" +// to force compilation diff --git a/runtime/neurun/backend/hi_perf_cpu/KernelGenerator.h b/runtime/neurun/backend/hi_perf_cpu/KernelGenerator.h new file mode 100644 index 000000000..3197995e1 --- /dev/null +++ b/runtime/neurun/backend/hi_perf_cpu/KernelGenerator.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_HI_PERF_CPU_KERNEL_GENERATOR_H__ +#define __NEURUN_BACKEND_HI_PERF_CPU_KERNEL_GENERATOR_H__ + +#include <backend/IKernelGenerator.h> + +#include "ir/Operands.h" +#include "TensorBuilder.h" + +namespace neurun +{ +namespace backend +{ +namespace hi_perf_cpu +{ + +class KernelGenerator : public IKernelGenerator +{ +public: + KernelGenerator(const Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder); + // TODO add more ops + +private: + const Operands &_ctx; + std::shared_ptr<TensorBuilder> _tensor_builder; +}; + +} // namespace hi_perf_cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_HI_PERF_CPU_KERNEL_GENERATOR_H__ diff --git a/runtime/neurun/backend/hi_perf_cpu/TensorBuilder.cc b/runtime/neurun/backend/hi_perf_cpu/TensorBuilder.cc new file mode 100644 index 000000000..e6ebf5f0b --- /dev/null +++ b/runtime/neurun/backend/hi_perf_cpu/TensorBuilder.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TensorBuilder.h" +// to force compilation diff --git a/runtime/neurun/backend/hi_perf_cpu/TensorBuilder.h b/runtime/neurun/backend/hi_perf_cpu/TensorBuilder.h new file mode 100644 index 000000000..af879a41e --- /dev/null +++ b/runtime/neurun/backend/hi_perf_cpu/TensorBuilder.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef NNFW_TENSORBUILDER_H +#define NNFW_TENSORBUILDER_H + +#include <unordered_map> + +#include <backend/ITensorBuilder.h> +#include "ir/OperandIndexMap.h" + +namespace neurun +{ +namespace backend +{ +namespace hi_perf_cpu +{ + +class TensorBuilder : public ITensorBuilder +{ +public: + TensorBuilder(); + +private: +}; + +} // namespace hi_perf_cpu +} // namespace backend +} // namespace neurun + +#endif // NNFW_TENSORBUILDER_H diff --git a/runtime/neurun/backend/srcn/Backend.h b/runtime/neurun/backend/srcn/Backend.h new file mode 100644 index 000000000..bc76a7ed2 --- /dev/null +++ b/runtime/neurun/backend/srcn/Backend.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_SRCN_BACKEND_H__ +#define __NEURUN_BACKEND_SRCN_BACKEND_H__ + +#include <memory> +#include <backend/Backend.h> +#include <ir/Operands.h> + +#include "Config.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" +#include "ShapeFixer.h" +#include "TensorRegister.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +class Backend : public ::neurun::backend::Backend +{ +public: + Backend() : _config{std::make_shared<Config>()} {} + + std::shared_ptr<IConfig> config() const override { return _config; } + + std::unique_ptr<BackendContext> + newContext(const ir::Operands &operands, + const std::shared_ptr<custom::IKernelBuilder> &kb) const override + { + auto tensor_builder = std::make_shared<TensorBuilder>(); + return std::unique_ptr<BackendContext>{new BackendContext{ + this, tensor_builder, std::make_shared<ConstantInitializer>(operands, tensor_builder), + std::make_shared<KernelGenerator>(operands, tensor_builder, kb), + std::make_shared<ShapeFixer>(operands, tensor_builder), + std::make_shared<TensorRegister>(operands, tensor_builder)}}; + } + +private: + std::shared_ptr<IConfig> _config; +}; + +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_SRCN_BACKEND_H__ diff --git a/runtime/neurun/backend/srcn/CMakeLists.txt b/runtime/neurun/backend/srcn/CMakeLists.txt new file mode 100644 index 000000000..3b0440c8e --- /dev/null +++ b/runtime/neurun/backend/srcn/CMakeLists.txt @@ -0,0 +1,21 @@ +if(NOT BUILD_SRCN_KERNEL) + message(STATUS "Skip building SRCN backend: SRCN kernel library is not build") + return() +endif() + +set(LIB_NEURUN_BACKEND_SRCN neurun_backend_srcn) + +file(GLOB_RECURSE SOURCES "*.cc") + +add_library(${LIB_NEURUN_BACKEND_SRCN} SHARED ${SOURCES}) + +target_link_libraries(${LIB_NEURUN_BACKEND_SRCN} PUBLIC nnfw_lib_cpp14) +target_link_libraries(${LIB_NEURUN_BACKEND_SRCN} PRIVATE nnfw_lib_srcn) +target_link_libraries(${LIB_NEURUN_BACKEND_SRCN} PRIVATE neurun_core) +target_link_libraries(${LIB_NEURUN_BACKEND_SRCN} PRIVATE ${LIB_NEURUN_BACKEND_CPU_COMMON}) +target_link_libraries(${LIB_NEURUN_BACKEND_SRCN} PRIVATE nnfw_common) +target_link_libraries(${LIB_NEURUN_BACKEND_SRCN} PRIVATE nnfw_coverage) + +set_target_properties(${LIB_NEURUN_BACKEND_SRCN} PROPERTIES OUTPUT_NAME backend_srcn) + +install(TARGETS ${LIB_NEURUN_BACKEND_SRCN} DESTINATION lib) diff --git a/runtime/neurun/backend/srcn/Config.cc b/runtime/neurun/backend/srcn/Config.cc new file mode 100644 index 000000000..6865657e7 --- /dev/null +++ b/runtime/neurun/backend/srcn/Config.cc @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Config.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +bool Config::initialize() { return true; } + +} // namespace srcn +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/srcn/Config.h b/runtime/neurun/backend/srcn/Config.h new file mode 100644 index 000000000..efc77fde2 --- /dev/null +++ b/runtime/neurun/backend/srcn/Config.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_SRCN_CONFIG_H__ +#define __NEURUN_BACKEND_SRCN_CONFIG_H__ + +#include <backend/IConfig.h> + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +class Config : public IConfig +{ +public: + std::string id() override { return "srcn"; } + bool initialize() override; + bool SupportPermutation() override { return false; } + bool SupportSubTensorAlloc() override + { + // NOTE srcn allocator cannot support subtensor allocation yet + return false; + } +}; + +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_SRCN_CONFIG_H__ diff --git a/runtime/neurun/backend/srcn/ConstantInitializer.cc b/runtime/neurun/backend/srcn/ConstantInitializer.cc new file mode 100644 index 000000000..f03628b1f --- /dev/null +++ b/runtime/neurun/backend/srcn/ConstantInitializer.cc @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConstantInitializer.h" + +#include "kernel/OperationUtils.h" + +namespace +{ + +template <typename T> +static void PermuteKernel(const neurun::ir::Operand &model_obj, + neurun::backend::operand::ITensor &obj, + const std::vector<int32_t> &permutation) +{ + const auto shape = model_obj.shape(); + auto base = reinterpret_cast<const T *>(model_obj.data().base()); + + assert(shape.rank() == 4); + assert(permutation.size() == 4); + assert(permutation[0] != permutation[1] && permutation[0] != permutation[2] && + permutation[0] != permutation[3]); + assert(permutation[1] != permutation[2] && permutation[1] != permutation[3]); + assert(permutation[2] != permutation[3]); + assert(permutation[0] < 4 && permutation[1] < 4 && permutation[2] < 4 && permutation[3] < 4); + + obj.access([&](::neurun::backend::operand::ITensor &tensor) { + if (permutation[0] == 0 && permutation[1] == 1 && permutation[2] == 2 && permutation[3] == 3) + { + memcpy(tensor.buffer(), base, shape.num_elements() * sizeof(T)); + } + else + { + const int32_t dim0 = shape.dim(0); + const int32_t dim1 = shape.dim(1); + const int32_t dim2 = shape.dim(2); + const int32_t dim3 = shape.dim(3); + for (auto i = 0; i < dim0; ++i) + { + for (auto j = 0; j < dim1; ++j) + { + for (auto k = 0; k < dim2; ++k) + { + for (auto l = 0; l < dim3; ++l) + { + Coordinates frontend_coords{i, j, k, l}; + Coordinates coords = frontend_coords; + coords.set(0, frontend_coords[permutation[0]]); + coords.set(1, frontend_coords[permutation[1]]); + coords.set(2, frontend_coords[permutation[2]]); + coords.set(3, frontend_coords[permutation[3]]); + T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset(coords)); + T value = *(base + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3 + l); + *into = value; + } + } + } + } + } + }); +} +} + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +ConstantInitializer::ConstantInitializer(const ir::Operands &operands, + const std::shared_ptr<TensorBuilder> &tensor_builder) + : _operands{operands}, _tensor_builder{tensor_builder} +{ + // DO NOTHING +} + +void ConstantInitializer::registerPermuteKernelInitializer(const ir::OperandIndex &index, + const ir::Operand &obj, + const std::vector<int32_t> &permutation) +{ + // For only CONSTANTS + if (!obj.isConstant()) + return; + + VERBOSE(FillOperandData) << "[SRCN] Fill data for operand " << index.value() << std::endl; + + const auto type = obj.typeInfo().type(); + using ir::DataType; + using namespace std::placeholders; + + switch (type) + { + case DataType::FLOAT32: + _init_map[index] = std::bind(PermuteKernel<float>, _1, _2, permutation); + break; + case DataType::INT32: + _init_map[index] = std::bind(PermuteKernel<int32_t>, _1, _2, permutation); + break; + case DataType::UINT32: + _init_map[index] = std::bind(PermuteKernel<uint32_t>, _1, _2, permutation); + break; + case DataType::BOOL8: + case DataType::QUANT8_ASYMM: + _init_map[index] = std::bind(PermuteKernel<uint8_t>, _1, _2, permutation); + break; + case DataType::QUANT8_SYMM: + _init_map[index] = std::bind(PermuteKernel<int8_t>, _1, _2, permutation); + break; + default: + throw std::runtime_error("Not supported, yet"); + break; + } +} + +void ConstantInitializer::visit(const ir::operation::Conv2D &node) +{ + const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL); + const auto &kernel_obj = _operands.at(kernel_index); + util::Coordinates permutation{0, 1, 2, 3}; + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = _tensor_builder->tensorAt(kernel_index)->layout(); + assert(frontend_layout == ir::Layout::NHWC || frontend_layout == ir::Layout::NCHW); + assert(backend_layout == ir::Layout::NHWC || backend_layout == ir::Layout::NCHW); + const auto frontend_filter_layout = + frontend_layout == ir::Layout::NHWC ? kernel::FilterLayout::OHWI : kernel::FilterLayout::OIHW; + const auto backend_filter_layout = + backend_layout == ir::Layout::NHWC ? kernel::FilterLayout::HWIO : kernel::FilterLayout::OIHW; + registerPermuteKernelInitializer( + kernel_index, kernel_obj, + kernel::getFilterPermutation(frontend_filter_layout, backend_filter_layout)); +} + +void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node) +{ + const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL); + const auto &kernel_obj = _operands.at(kernel_index); + util::Coordinates permutation{0, 1, 2, 3}; + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = _tensor_builder->tensorAt(kernel_index)->layout(); + assert(frontend_layout == ir::Layout::NHWC || frontend_layout == ir::Layout::NCHW); + assert(backend_layout == ir::Layout::NHWC || backend_layout == ir::Layout::NCHW); + const auto frontend_filter_layout = + frontend_layout == ir::Layout::NHWC ? kernel::FilterLayout::OHWI : kernel::FilterLayout::OIHW; + const auto backend_filter_layout = + backend_layout == ir::Layout::NHWC ? kernel::FilterLayout::HWIO : kernel::FilterLayout::OIHW; + registerPermuteKernelInitializer( + kernel_index, kernel_obj, + kernel::getFilterPermutation(frontend_filter_layout, backend_filter_layout)); +} + +void ConstantInitializer::visit(const ir::operation::TransposeConv &node) +{ + // NOTE The srcn deconvolution layer takes a HWOI layout as kernel filter even though image layout + // is NHWC. + // This policy is the same with the tensorflow policy. + // So for using srcn library, we need to change kernel layout to HWOI from OHWI or OIHW in + // this case. + // Also the srcn deconvolution layer takes a OIHW layout as kernel filter if image's layout + // is NCHW + const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL); + const auto &kernel_obj = _operands.at(kernel_index); + const auto frontend_layout = _current_subg_layout; + const auto backend_layout = _tensor_builder->tensorAt(kernel_index)->layout(); + assert(frontend_layout == ir::Layout::NHWC || frontend_layout == ir::Layout::NCHW); + assert(backend_layout == ir::Layout::NHWC || backend_layout == ir::Layout::NCHW); + const auto frontend_filter_layout = + frontend_layout == ir::Layout::NHWC ? kernel::FilterLayout::OHWI : kernel::FilterLayout::OIHW; + const auto backend_filter_layout = + backend_layout == ir::Layout::NHWC ? kernel::FilterLayout::HWOI : kernel::FilterLayout::IOHW; + registerPermuteKernelInitializer( + kernel_index, kernel_obj, + kernel::getFilterPermutation(frontend_filter_layout, backend_filter_layout)); +} + +} // namespace srcn +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/srcn/ConstantInitializer.h b/runtime/neurun/backend/srcn/ConstantInitializer.h new file mode 100644 index 000000000..eadfe2ae1 --- /dev/null +++ b/runtime/neurun/backend/srcn/ConstantInitializer.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_COMPILER_SRCN_CONSTANT_INITIALIZER_H__ +#define __NEURUN_COMPILER_SRCN_CONSTANT_INITIALIZER_H__ + +#include <backend/IConstantInitializer.h> +#include <ir/Operands.h> +#include "TensorBuilder.h" +#include <util/Coordinates.h> + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +class ConstantInitializer : public IConstantInitializer +{ +public: + ConstantInitializer(const ir::Operands &operands, + const std::shared_ptr<TensorBuilder> &tensor_builder); + +public: + void registerPermuteKernelInitializer(const ir::OperandIndex &index, const ir::Operand &obj, + const std::vector<int32_t> &permutation); + +public: + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::TransposeConv &) override; + +private: + const ir::Operands &operands() const override { return _operands; } + std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; } + +private: + const ir::Operands &_operands; + std::shared_ptr<TensorBuilder> _tensor_builder; +}; + +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_COMPILER_SRCN_CONSTANT_INITIALIZER_H__ diff --git a/runtime/neurun/backend/srcn/Convert.cc b/runtime/neurun/backend/srcn/Convert.cc new file mode 100644 index 000000000..1d80b2c7c --- /dev/null +++ b/runtime/neurun/backend/srcn/Convert.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Convert.h" + +#include <cassert> +#include <cpp14/memory.h> +#include <ir/DataType.h> +#include "Swizzle.h" +#include <vector> + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +ir::Shape asKernelShape(const ir::Shape &shape, kernel::FilterLayout frontend_layout, + kernel::FilterLayout backend_layout) +{ + assert(shape.rank() == 4); + if (frontend_layout == backend_layout) + { + return ir::Shape{shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3)}; + } + + const auto permutation = getFilterPermutation(frontend_layout, backend_layout); + if (permutation.size() == 0) + { + throw std::runtime_error("Not supported FilterLayout"); + } + return ir::Shape{shape.dim(permutation[0]), shape.dim(permutation[1]), shape.dim(permutation[2]), + shape.dim(permutation[3])}; +} + +ir::Shape asTensorShape(const ir::Shape &shape, ir::Layout frontend_layout, + ir::Layout backend_layout) +{ + const uint32_t rank = shape.rank(); + + ir::Shape ret(rank); + for (uint32_t axis = 0; axis < rank; ++axis) + { + const auto ncnn_axis = ToNCNNAxis(rank, axis, frontend_layout, backend_layout); + ret.dim(ncnn_axis) = shape.dim(axis); + } + + return ret; +} + +ir::OperandInfo asTensorInfo(const ir::Shape &shape, const ir::TypeInfo &typeInfo, + ir::Layout frontend_layout, ir::Layout backend_layout) +{ + ir::OperandInfo info(asTensorShape(shape, frontend_layout, backend_layout), typeInfo); + + return info; +} + +} // namespace srcn +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/srcn/Convert.h b/runtime/neurun/backend/srcn/Convert.h new file mode 100644 index 000000000..64be46e60 --- /dev/null +++ b/runtime/neurun/backend/srcn/Convert.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_SRCN_CONVERT_H__ +#define __NEURUN_BACKEND_SRCN_CONVERT_H__ + +#include "kernel/OperationUtils.h" +#include <ir/Layout.h> +#include <ir/Shape.h> +#include <ir/TypeInfo.h> +#include <ir/OperandInfo.h> + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +ir::Shape asKernelShape(const ir::Shape &shape, kernel::FilterLayout frontend_layout, + kernel::FilterLayout backend_layout); + +ir::Shape asTensorShape(const ir::Shape &shape, ir::Layout frontend_layout, + ir::Layout backend_layout); + +ir::OperandInfo asTensorInfo(const ir::Shape &shape, const ir::TypeInfo &typeInfo, + ir::Layout frontend_layout, ir::Layout backend_layout); + +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_SRCN_CONVERT_H__ diff --git a/runtime/neurun/backend/srcn/KernelGenerator.cc b/runtime/neurun/backend/srcn/KernelGenerator.cc new file mode 100644 index 000000000..c096f9230 --- /dev/null +++ b/runtime/neurun/backend/srcn/KernelGenerator.cc @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "KernelGenerator.h" + +#include <stdexcept> + +#include "cpp14/memory.h" +#include "util/Padding.h" +#include "kernel/ConvolutionLayer.h" +#include "kernel/DepthwiseConvolutionLayer.h" +#include "kernel/InstanceNormLayer.h" +#include "kernel/TransposeConvLayer.h" +#include "kernel/AddLayer.h" + +#include <backend/Backend.h> +#include <backend/IConfig.h> + +#include "util/logging.h" + +#include "util/Utils.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +KernelGenerator::KernelGenerator(const ir::Operands &operand_ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<custom::IKernelBuilder> &kb) + : _ctx(operand_ctx), _tensor_builder(tensor_builder), _kernel_builder(kb), + _current_subg_layout(ir::Layout::UNKNOWN) +{ + // DO NOTHING +} + +void KernelGenerator::visit(const ir::OpSequence &op_seq) +{ + _current_subg_layout = op_seq.getLayout(); + for (const auto &e : op_seq.operations()) + { + const auto &node = *(e.node); + _tensor_builder->preVisit(node); + node.accept(*this); + _tensor_builder->postVisit(node); + } +} + +void KernelGenerator::visit(const ir::operation::Conv2D &node) +{ + using ir::operation::Conv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; + + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in] if NHWC and [depth_out, + // depth_in, kernel_height, kernel_width] if NCHW. + const auto &ker_shape = _ctx.at(ker_index).shape().asFeature(_current_subg_layout); + const auto ker_height = ker_shape.H; + const auto ker_width = ker_shape.W; + const auto stride = node.param().stride; + const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, + stride, ker_width, ker_height); + const int has_padding = padding.left + padding.right + padding.top + padding.bottom; + + auto ofm_alloc = _tensor_builder->at(ofm_index); + auto ifm_alloc = _tensor_builder->at(ifm_index); + auto ker_alloc = _tensor_builder->at(ker_index); + auto bias_alloc = _tensor_builder->at(bias_index); + const auto backend_layout = ifm_alloc->layout(); + assert(backend_layout == ofm_alloc->layout()); + + const auto ofm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(ofm_index), _current_subg_layout, backend_layout); + const auto ifm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(ifm_index), _current_subg_layout, backend_layout); + const auto ker_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(ker_index), _current_subg_layout, backend_layout); + const auto bias_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(bias_index), _current_subg_layout, backend_layout); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::srcn::kernel::ConvolutionLayer>(); + + // TODO Support activation + fn->configure(ifm_alloc->buffer(), ifm_backend_descr, ker_alloc->buffer(), ker_backend_descr, + bias_alloc->buffer(), bias_backend_descr, has_padding, padding.left, padding.right, + padding.top, padding.bottom, stride.horizontal, stride.vertical, + /*activation,*/ ofm_alloc->buffer(), ofm_backend_descr, backend_layout); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::InstanceNorm &node) +{ + using ir::operation::InstanceNorm; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(InstanceNorm::Input::INPUT)}; + const auto gamma_index{node.getInputs().at(InstanceNorm::Input::GAMMA)}; + const auto beta_index{node.getInputs().at(InstanceNorm::Input::BETA)}; + + const auto epsilon = node.param().epsilon; + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index); + auto ifm_alloc = _tensor_builder->at(ifm_index); + auto gamma_alloc = _tensor_builder->at(gamma_index); + auto beta_alloc = _tensor_builder->at(beta_index); + + const auto backend_layout = ofm_alloc->layout(); + + const auto ofm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(ofm_index), _current_subg_layout, backend_layout); + const auto ifm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(ifm_index), _current_subg_layout, backend_layout); + const auto gamma_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(gamma_index), _current_subg_layout, backend_layout); + const auto beta_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(beta_index), _current_subg_layout, backend_layout); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::srcn::kernel::InstanceNormLayer>(); + + fn->configure(ifm_alloc->buffer(), ifm_backend_descr, gamma_alloc->buffer(), gamma_backend_descr, + beta_alloc->buffer(), beta_backend_descr, ofm_alloc->buffer(), ofm_backend_descr, + epsilon, activation, backend_layout); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) +{ + using ir::operation::DepthwiseConv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)}; + + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in] if NHWC and [depth_out, + // depth_in, kernel_height, kernel_width] if NCHW. + const auto &ker_shape = _ctx.at(ker_index).shape().asFeature(_current_subg_layout); + const auto ker_height = ker_shape.H; + const auto ker_width = ker_shape.W; + const auto stride = node.param().stride; + const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, + stride, ker_width, ker_height); + const int padding_type = padding.left + padding.right + padding.top + padding.bottom; + + auto ofm_alloc = _tensor_builder->at(ofm_index); + auto ifm_alloc = _tensor_builder->at(ifm_index); + auto ker_alloc = _tensor_builder->at(ker_index); + auto bias_alloc = _tensor_builder->at(bias_index); + const auto backend_layout = ifm_alloc->layout(); + assert(backend_layout == ofm_alloc->layout()); + + const auto ofm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(ofm_index), _current_subg_layout, backend_layout); + const auto ifm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(ifm_index), _current_subg_layout, backend_layout); + const auto ker_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(ker_index), _current_subg_layout, backend_layout); + const auto bias_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(bias_index), _current_subg_layout, backend_layout); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::srcn::kernel::DepthwiseConvolutionLayer>(); + + // TODO Support activation + fn->configure(ifm_alloc->buffer(), ifm_backend_descr, ker_alloc->buffer(), ker_backend_descr, + bias_alloc->buffer(), bias_backend_descr, padding_type, padding.left, padding.right, + padding.top, padding.bottom, stride.horizontal, stride.vertical, + /*activation,*/ ofm_alloc->buffer(), ofm_backend_descr, backend_layout); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::TransposeConv &node) +{ + using ir::operation::TransposeConv; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(TransposeConv::Input::INPUT)}; + const auto ker_index{node.getInputs().at(TransposeConv::Input::KERNEL)}; + const auto output_shape_index{node.getInputs().at(TransposeConv::Input::OUTPUT_SHAPE)}; + + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in] if NHWC and [depth_out, + // depth_in, kernel_height, kernel_width] if NCHW. + const auto &ker_shape = _ctx.at(ker_index).shape().asFeature(_current_subg_layout); + const auto ker_height = ker_shape.H; + const auto ker_width = ker_shape.W; + const auto stride = node.param().stride; + const int padding_type = (node.param().padding.type == ir::PaddingType::SAME); + const auto padding = neurun::util::calculatePadding(node.param().padding, ofm_shape, ifm_shape, + stride, ker_width, ker_height); + + auto ofm_alloc = _tensor_builder->at(ofm_index); + auto ifm_alloc = _tensor_builder->at(ifm_index); + auto ker_alloc = _tensor_builder->at(ker_index); + const auto backend_layout = ofm_alloc->layout(); + assert(backend_layout == ifm_alloc->layout()); + + const auto ofm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(ofm_index), _current_subg_layout, backend_layout); + const auto ifm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(ifm_index), _current_subg_layout, backend_layout); + const auto ker_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(ker_index), _current_subg_layout, backend_layout); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::srcn::kernel::TransposeConvLayer>(); + + fn->configure(ifm_alloc->buffer(), ifm_backend_descr, ker_alloc->buffer(), ker_backend_descr, + padding_type, padding.left, padding.right, padding.top, padding.bottom, + stride.horizontal, stride.vertical, ofm_alloc->buffer(), ofm_backend_descr, + backend_layout); + + _execution_builder->append(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::Add &node) +{ + using ir::operation::Add; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(Add::Input::LHS)}; + const auto rhs_index{node.getInputs().at(Add::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + const auto backend_layout = ofm_alloc->layout(); + assert(backend_layout == lhs_alloc->layout() && backend_layout == rhs_alloc->layout()); + + const auto ofm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(ofm_index), _current_subg_layout, backend_layout); + const auto lhs_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(lhs_index), _current_subg_layout, backend_layout); + const auto rhs_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor( + _ctx.at(rhs_index), _current_subg_layout, backend_layout); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::srcn::kernel::AddLayer>(); + + fn->configure(lhs_alloc->buffer(), lhs_backend_descr, rhs_alloc->buffer(), rhs_backend_descr, + activation, ofm_alloc->buffer(), ofm_backend_descr, backend_layout); + + _execution_builder->append(std::move(fn)); +} + +} // namespace srcn +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/srcn/KernelGenerator.h b/runtime/neurun/backend/srcn/KernelGenerator.h new file mode 100644 index 000000000..879aeaf5b --- /dev/null +++ b/runtime/neurun/backend/srcn/KernelGenerator.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_SRCN_KERNEL_GENERATOR_H__ +#define __NEURUN_BACKEND_SRCN_KERNEL_GENERATOR_H__ + +#include "backend/IKernelGenerator.h" +#include "ir/Operands.h" +#include "operand/Tensor.h" +#include "backend/CustomKernelBuilder.h" +#include "TensorBuilder.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +class KernelGenerator : public IKernelGenerator +{ +public: + KernelGenerator(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<custom::IKernelBuilder> &kb); + + using IKernelGenerator::visit; + + void visit(const ir::OpSequence &) override; + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::InstanceNorm &) override; + void visit(const ir::operation::TransposeConv &) override; + void visit(const ir::operation::Add &) override; + +private: + const ir::Operands &_ctx; + std::shared_ptr<TensorBuilder> _tensor_builder; + std::shared_ptr<custom::IKernelBuilder> _kernel_builder; + ir::Layout _current_subg_layout; +}; + +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_SRCN_KERNEL_GENERATOR_H__ diff --git a/runtime/neurun/backend/srcn/MemoryManager.cc b/runtime/neurun/backend/srcn/MemoryManager.cc new file mode 100644 index 000000000..aa07ab168 --- /dev/null +++ b/runtime/neurun/backend/srcn/MemoryManager.cc @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MemoryManager.h" + +#include <cassert> + +#include <MemoryPlannerFactory.h> +#include "util/logging.h" +#include "util/ConfigSource.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +MemoryManager::MemoryManager() : _mem_planner{createMemoryPlanner()} +{ + // DO NOTHING +} + +MemoryManager::MemoryManager(const std::string planner_id) + : _mem_planner{createMemoryPlanner(planner_id)} +{ + // DO NOTHING +} + +cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner() +{ + auto planner_id = util::getConfigString(util::config::CPU_MEMORY_PLANNER); + return cpu_common::MemoryPlannerFactory::get().create(planner_id); +} + +cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner(const std::string planner_id) +{ + return cpu_common::MemoryPlannerFactory::get().create(planner_id); +} + +void MemoryManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout layout) +{ + auto tensor = std::make_shared<operand::Tensor>(info, layout); + _tensors[ind] = tensor; +} + +void MemoryManager::claimPlan(const ir::OperandIndex &ind, uint32_t size) +{ + _mem_planner->claim(ind, size); +} + +void MemoryManager::releasePlan(const ir::OperandIndex &ind) { _mem_planner->release(ind); } + +void MemoryManager::allocate(void) +{ + _mem_alloc = std::make_shared<cpu_common::Allocator>(_mem_planner->capacity()); + assert(_mem_alloc->base()); + + for (auto &mem_plan : _mem_planner->memory_plans()) + { + auto ind = mem_plan.first; + auto mem_blk = mem_plan.second; + + uint8_t *buffer = _mem_alloc->base() + mem_blk.offset; + auto tensor = _tensors[ind]; + tensor->setBuffer(buffer); + + VERBOSE(CPU_MEMORYMANAGER) << "TENSOR(#" << ind.value() << "): " << static_cast<void *>(buffer) + << std::endl; + + // If we do not make tensor here currently, kernel generation would cause segmentation fault. + // See also : Comments in `allocate` method. + } +} + +} // namespace srcn +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/srcn/MemoryManager.h b/runtime/neurun/backend/srcn/MemoryManager.h new file mode 100644 index 000000000..05fa07622 --- /dev/null +++ b/runtime/neurun/backend/srcn/MemoryManager.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_SRCN_MEMORY_MANAGER_H__ +#define __NEURUN_BACKEND_SRCN_MEMORY_MANAGER_H__ + +#include "backend/IMemoryManager.h" +#include <MemoryPlanner.h> +#include "operand/Tensor.h" +#include "ir/OperandIndexMap.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +class MemoryManager : public backend::IMemoryManager +{ +public: + MemoryManager(); + MemoryManager(const std::string); + virtual ~MemoryManager() = default; + + void allocate(void) override; + void deallocate(void) override { _mem_alloc->release(); } + + void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info, ir::Layout layout); + void claimPlan(const ir::OperandIndex &ind, uint32_t size); + void releasePlan(const ir::OperandIndex &ind); + + ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &tensors(void) { return _tensors; } + +private: + cpu_common::IMemoryPlanner *createMemoryPlanner(); + cpu_common::IMemoryPlanner *createMemoryPlanner(std::string); + +private: + ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> _tensors; + ir::OperandIndexMap<cpu_common::Block> _tensor_mem_map; + std::shared_ptr<cpu_common::IMemoryPlanner> _mem_planner; + std::shared_ptr<cpu_common::Allocator> _mem_alloc; +}; + +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_SRCN_MEMORY_MANAGER_H__ diff --git a/runtime/neurun/backend/srcn/PluginClassesAllocator.cc b/runtime/neurun/backend/srcn/PluginClassesAllocator.cc new file mode 100644 index 000000000..9efc6aaaa --- /dev/null +++ b/runtime/neurun/backend/srcn/PluginClassesAllocator.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <util/logging.h> + +#include "Backend.h" + +extern "C" { +neurun::backend::Backend *neurun_backend_create() +{ + VERBOSE(neurun_backend_create) << "'srcn' loaded\n"; + return new neurun::backend::srcn::Backend; +} + +void neurun_backend_destroy(neurun::backend::Backend *backend) +{ + VERBOSE(neurun_backend_create) << "'srcn' unloaded\n"; + delete backend; +} +} diff --git a/runtime/neurun/backend/srcn/ShapeFixer.cc b/runtime/neurun/backend/srcn/ShapeFixer.cc new file mode 100644 index 000000000..0ef190f28 --- /dev/null +++ b/runtime/neurun/backend/srcn/ShapeFixer.cc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ShapeFixer.h" + +#include <stdexcept> + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +ShapeFixer::ShapeFixer(const ir::Operands &operand_ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder) + : _ctx(operand_ctx), _tensor_builder(tensor_builder) +{ + assert(tensor_builder); +} + +void ShapeFixer::visit(const ir::operation::Conv2D &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::DepthwiseConv2D &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::InstanceNorm &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::TransposeConv &) { /* DO NOTHING */} + +void ShapeFixer::visit(const ir::operation::Add &) { /* DO NOTHING */} + +} // namespace srcn +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/srcn/ShapeFixer.h b/runtime/neurun/backend/srcn/ShapeFixer.h new file mode 100644 index 000000000..7da1ae731 --- /dev/null +++ b/runtime/neurun/backend/srcn/ShapeFixer.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_SRCN_SHAPE_FIXER_H__ +#define __NEURUN_BACKEND_SRCN_SHAPE_FIXER_H__ + +#include <backend/IShapeFixer.h> + +#include "ir/Operands.h" +#include "operand/Tensor.h" +#include "TensorBuilder.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +class ShapeFixer : public IShapeFixer +{ +public: + ShapeFixer(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder); + + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::InstanceNorm &) override; + void visit(const ir::operation::TransposeConv &) override; + void visit(const ir::operation::Add &) override; + +private: + const ir::Operands &_ctx; + std::shared_ptr<TensorBuilder> _tensor_builder; +}; + +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_SRCN_SHAPE_FIXER_H__ diff --git a/runtime/neurun/backend/srcn/Swizzle.h b/runtime/neurun/backend/srcn/Swizzle.h new file mode 100644 index 000000000..d1f922367 --- /dev/null +++ b/runtime/neurun/backend/srcn/Swizzle.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_SRCN_SWIZZLE_H__ +#define __NEURUN_BACKEND_SRCN_SWIZZLE_H__ + +#include <cassert> +#include <ir/Layout.h> + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +// Convert axis in ncnn order +inline uint32_t ToNCNNAxis(uint32_t rank, uint32_t axis, + const ir::Layout org_layout = ir::Layout::UNKNOWN, + const ir::Layout ncnn_layout = ir::Layout::UNKNOWN) +{ + assert(rank > axis); + + if (rank >= 4 && org_layout == ir::Layout::NHWC && ncnn_layout == ir::Layout::NCHW) + { + // NHWC -> NCHW + // DEPTH + if (axis == 3) + { + return 1; + } + // WIDTH + if (axis == 2) + { + return 3; + } + // HEIGHT + if (axis == 1) + { + return 2; + } + } + + if (rank >= 4 && org_layout == ir::Layout::NCHW && ncnn_layout == ir::Layout::NHWC) + { + // NCHW -> NHWC + // WIDTH + if (axis == 3) + { + return 2; + } + // HEIGHT + if (axis == 2) + { + return 1; + } + // DEPTH + if (axis == 1) + { + return 3; + } + } + + return axis; +} + +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_SRCN_SWIZZLE_H__ diff --git a/runtime/neurun/backend/srcn/TensorBuilder.cc b/runtime/neurun/backend/srcn/TensorBuilder.cc new file mode 100644 index 000000000..5ac25c33e --- /dev/null +++ b/runtime/neurun/backend/srcn/TensorBuilder.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TensorBuilder.h" + +#include <cassert> +#include "Convert.h" +#include "util/logging.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +TensorBuilder::TensorBuilder() : _tensor_mgr{new TensorManager()} +{ + // DO NOTHING +} + +void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, + const ir::OperandInfo &tensor_info, + ir::Layout backend_layout, bool as_const) +{ + _tensor_info_map.emplace(ind, tensor_info); + _tensor_layout_map.emplace(ind, backend_layout); + + if (as_const) + _constants.append(ind); +} + +void TensorBuilder::registerSubTensorInfo(const ir::OperandIndex &, const compiler::SubTensorInfo &) +{ + // Not supported yet + assert(false); +} + +void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind) +{ + assert(_tensor_info_map.find(ind) != _tensor_info_map.end()); + const auto &tensor_info = _tensor_info_map.at(ind); + const auto size = tensor_info.total_size(); + const auto &backend_layout = _tensor_layout_map.at(ind); + _tensor_mgr->buildTensor(ind, tensor_info, backend_layout, _constants.contains(ind)); + _tensor_mgr->claimPlan(ind, size); +} + +void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind) { _tensor_mgr->releasePlan(ind); } + +bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const +{ + return _tensor_info_map.find(ind) != _tensor_info_map.end(); +} + +void TensorBuilder::prepare(void) +{ + _tensor_mgr->allocateConsts(); + _tensor_mgr->allocateNonconsts(); +} + +void TensorBuilder::allocateConsts() +{ + // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate + // This is because SRCN kernels require `ITensor`s to be allocated before Kernel Generation. +} + +void TensorBuilder::allocateNonconsts() +{ + // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate + // This is because SRCN kernels require `ITensor`s to be allocated before Kernel Generation. +} + +std::shared_ptr<::neurun::backend::operand::ITensor> +TensorBuilder::tensorAt(const ir::OperandIndex &ind) +{ + return _tensor_mgr->at(ind); +} + +void TensorBuilder::iterate(const IterateFunction &fn) { _tensor_mgr->iterate(fn); } + +std::shared_ptr<operand::Tensor> TensorBuilder::at(const ir::OperandIndex &ind) +{ + return _tensor_mgr->at(ind); +} + +std::unique_ptr<ITensorManager> TensorBuilder::releaseTensorManager(void) +{ + return std::move(_tensor_mgr); +} + +} // namespace srcn +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/srcn/TensorBuilder.h b/runtime/neurun/backend/srcn/TensorBuilder.h new file mode 100644 index 000000000..38bd6dd89 --- /dev/null +++ b/runtime/neurun/backend/srcn/TensorBuilder.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_SRCN_TENSOR_BUILDER_H__ +#define __NEURUN_BACKEND_SRCN_TENSOR_BUILDER_H__ + +#include <unordered_map> + +#include <backend/ITensorBuilder.h> +#include "operand/Tensor.h" +#include "ir/OperandIndexMap.h" +#include "TensorManager.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +class TensorBuilder : public ITensorBuilder +{ +public: + TensorBuilder(); + + /** + * @brief Register tensor information to allocate on CPU backend + * @param[in] ind Operand index + * @param[in] info Operand information + * @param[in] layout Operand data layout + */ + void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout backend_layout, bool as_const) override; + /** + * @brief Register subtensor information to allocate on CPU backend + * @param[in] ind Operand index + * @param[in] info Tensor information + */ + void registerSubTensorInfo(const ir::OperandIndex &ind, + const compiler::SubTensorInfo &info) override; + + void notifyFirstUse(const ir::OperandIndex &) override; + void notifyLastUse(const ir::OperandIndex &) override; + + bool isRegistered(const ir::OperandIndex &) const override; + + void prepare(void) override; + void allocateConsts() override; + void allocateNonconsts() override; + void postFunctionPrepare() override { /* DO NOTHING */} + void finalize() override { /* DO NOTHING */} + + std::shared_ptr<::neurun::backend::operand::ITensor> + tensorAt(const ir::OperandIndex &ind) override; + + void iterate(const IterateFunction &fn) override; + + void preVisit(const ir::Operation &) override { /* DO NOTHING */} + void postVisit(const ir::Operation &) override { /* DO NOTHING */} + + std::unique_ptr<ITensorManager> releaseTensorManager(void) override; + + std::shared_ptr<operand::Tensor> at(const ir::OperandIndex &ind); + +private: + std::unique_ptr<TensorManager> _tensor_mgr; + ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map; + ir::OperandIndexMap<ir::Layout> _tensor_layout_map; + ir::OperandIndexSequence _constants; +}; + +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_SRCN_TENSOR_BUILDER_H__ diff --git a/runtime/neurun/backend/srcn/TensorManager.cc b/runtime/neurun/backend/srcn/TensorManager.cc new file mode 100644 index 000000000..717f1bbbc --- /dev/null +++ b/runtime/neurun/backend/srcn/TensorManager.cc @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TensorManager.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +TensorManager::TensorManager() + : _const_mgr{new MemoryManager("Bump")}, _nonconst_mgr{new MemoryManager()} +{ + // DO NOTHING +} + +void TensorManager::allocateConsts(void) { _const_mgr->allocate(); } + +void TensorManager::allocateNonconsts(void) { _nonconst_mgr->allocate(); } + +void TensorManager::deallocateConsts(void) { _const_mgr->deallocate(); } + +void TensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); } + +void TensorManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info, + ir::Layout layout, bool as_const) +{ + assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end()); + if (as_const) + { + _const_mgr->buildTensor(ind, tensor_info, layout); + _ind_to_mgr.insert({ind, *_const_mgr}); + } + else + { + _nonconst_mgr->buildTensor(ind, tensor_info, layout); + _ind_to_mgr.insert({ind, *_nonconst_mgr}); + } +} + +void TensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size) +{ + assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end()); + _ind_to_mgr.at(ind).claimPlan(ind, size); +} + +void TensorManager::releasePlan(const ir::OperandIndex &ind) +{ + assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end()); + _ind_to_mgr.at(ind).releasePlan(ind); +} + +std::shared_ptr<operand::Tensor> TensorManager::at(const ir::OperandIndex &ind) +{ + assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end()); + return _ind_to_mgr.at(ind).tensors().at(ind); +} + +ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &TensorManager::constTensors(void) +{ + return _const_mgr->tensors(); +} + +ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &TensorManager::nonconstTensors(void) +{ + return _nonconst_mgr->tensors(); +} + +void TensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn) +{ + for (auto it : _nonconst_mgr->tensors()) + fn(it.first); + + for (auto it : _const_mgr->tensors()) + fn(it.first); +} + +} // namespace srcn +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/srcn/TensorManager.h b/runtime/neurun/backend/srcn/TensorManager.h new file mode 100644 index 000000000..d4390d80c --- /dev/null +++ b/runtime/neurun/backend/srcn/TensorManager.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_SRCN_TENSOR_MANAGER_H__ +#define __NEURUN_BACKEND_SRCN_TENSOR_MANAGER_H__ + +#include "backend/ITensorManager.h" +#include "MemoryManager.h" +#include "ir/OperandIndexMap.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +class TensorManager : public backend::ITensorManager +{ +public: + TensorManager(); + virtual ~TensorManager() = default; + + void allocateConsts(void) override; + void allocateNonconsts(void) override; + void deallocateConsts(void) override; + void deallocateNonconsts(void) override; + + void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info, + ir::Layout layout, bool as_const); + + void claimPlan(const ir::OperandIndex &ind, uint32_t size); + void releasePlan(const ir::OperandIndex &ind); + + std::shared_ptr<operand::Tensor> at(const ir::OperandIndex &ind); + + ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &constTensors(void); + ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &nonconstTensors(void); + + void iterate(const std::function<void(const ir::OperandIndex &)> &fn); + +private: + std::unique_ptr<MemoryManager> _const_mgr; + std::unique_ptr<MemoryManager> _nonconst_mgr; + ir::OperandIndexMap<MemoryManager &> _ind_to_mgr; +}; + +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_SRCN_TENSOR_MANAGER_H__ diff --git a/runtime/neurun/backend/srcn/TensorRegister.cc b/runtime/neurun/backend/srcn/TensorRegister.cc new file mode 100644 index 000000000..8c2f59aef --- /dev/null +++ b/runtime/neurun/backend/srcn/TensorRegister.cc @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TensorRegister.h" + +#include "Convert.h" +#include "kernel/OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +TensorRegister::TensorRegister(const ir::Operands &operands, + const std::shared_ptr<TensorBuilder> &tensor_builder) + : _operands{operands}, _tensor_builder{tensor_builder} +{ + assert(tensor_builder != nullptr); +} + +void TensorRegister::visit(const ir::operation::Conv2D &node) +{ + // General cases + defaultRegisterTensorInfo(node.getInputs().at(ir::operation::Conv2D::INPUT)); + defaultRegisterTensorInfo(node.getInputs().at(ir::operation::Conv2D::BIAS)); + defaultRegisterTensorInfo(node.getOutputs().at(0)); + + // Special case + const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL); + const auto &kernel_obj = _operands.at(kernel_index); + + const auto frontend_layout = frontendLayout(); + assert(frontend_layout == ir::Layout::NCHW || frontend_layout == ir::Layout::NHWC); + const auto frontend_filter_layout = + frontend_layout == ir::Layout::NHWC ? kernel::FilterLayout::OHWI : kernel::FilterLayout::OIHW; + const auto backend_layout = backendLayout(kernel_index); + assert(backend_layout == ir::Layout::NCHW || backend_layout == ir::Layout::NHWC); + const auto backend_filter_layout = + backend_layout == ir::Layout::NHWC ? kernel::FilterLayout::HWIO : kernel::FilterLayout::OIHW; + + ir::OperandInfo backend_info{ + asKernelShape(kernel_obj.shape(), frontend_filter_layout, backend_filter_layout), + kernel_obj.info().typeInfo()}; + _tensor_builder->registerTensorInfo(kernel_index, backend_info, backend_layout, + kernel_obj.isConstant()); +} + +void TensorRegister::visit(const ir::operation::DepthwiseConv2D &node) +{ + // General cases + defaultRegisterTensorInfo(node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT)); + defaultRegisterTensorInfo(node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS)); + defaultRegisterTensorInfo(node.getOutputs().at(0)); + + // Special case + const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL); + const auto &kernel_obj = _operands.at(kernel_index); + + const auto frontend_layout = frontendLayout(); + assert(frontend_layout == ir::Layout::NCHW || frontend_layout == ir::Layout::NHWC); + const auto frontend_filter_layout = + frontend_layout == ir::Layout::NHWC ? kernel::FilterLayout::OHWI : kernel::FilterLayout::OIHW; + const auto backend_layout = backendLayout(kernel_index); + assert(backend_layout == ir::Layout::NCHW || backend_layout == ir::Layout::NHWC); + const auto backend_filter_layout = + backend_layout == ir::Layout::NHWC ? kernel::FilterLayout::HWIO : kernel::FilterLayout::OIHW; + + ir::OperandInfo backend_info{ + asKernelShape(kernel_obj.shape(), frontend_filter_layout, backend_filter_layout), + kernel_obj.info().typeInfo()}; + _tensor_builder->registerTensorInfo(kernel_index, backend_info, backend_layout, + kernel_obj.isConstant()); +} + +void TensorRegister::visit(const ir::operation::TransposeConv &node) +{ + // General cases + defaultRegisterTensorInfo(node.getInputs().at(ir::operation::TransposeConv::INPUT)); + defaultRegisterTensorInfo(node.getOutputs().at(0)); + + // Special case + const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL); + const auto &kernel_obj = _operands.at(kernel_index); + + const auto frontend_layout = frontendLayout(); + assert(frontend_layout == ir::Layout::NCHW || frontend_layout == ir::Layout::NHWC); + const auto frontend_filter_layout = + frontend_layout == ir::Layout::NHWC ? kernel::FilterLayout::OHWI : kernel::FilterLayout::OIHW; + const auto backend_layout = backendLayout(kernel_index); + assert(backend_layout == ir::Layout::NCHW || backend_layout == ir::Layout::NHWC); + const auto backend_filter_layout = + backend_layout == ir::Layout::NHWC ? kernel::FilterLayout::HWOI : kernel::FilterLayout::IOHW; + + ir::OperandInfo backend_info{ + asKernelShape(kernel_obj.shape(), frontend_filter_layout, backend_filter_layout), + kernel_obj.info().typeInfo()}; + _tensor_builder->registerTensorInfo(kernel_index, backend_info, backend_layout, + kernel_obj.isConstant()); +} + +} // namespace srcn +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/srcn/TensorRegister.h b/runtime/neurun/backend/srcn/TensorRegister.h new file mode 100644 index 000000000..765f29567 --- /dev/null +++ b/runtime/neurun/backend/srcn/TensorRegister.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_SRCN_TENSOR_REGISTER_H__ +#define __NEURUN_BACKEND_SRCN_TENSOR_REGISTER_H__ + +#include <backend/ITensorRegister.h> +#include "TensorBuilder.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ + +class TensorRegister : public ITensorRegister +{ +public: + TensorRegister(const ir::Operands &operands, + const std::shared_ptr<TensorBuilder> &tensor_builder); + +public: + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::TransposeConv &) override; + +private: + const ir::Operands &operands() const override { return _operands; } + std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; } + bool supportSubTensor() const final { return false; } + +private: + const ir::Operands &_operands; + const std::shared_ptr<TensorBuilder> _tensor_builder; +}; + +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_SRCN_TENSOR_REGISTER_H__ diff --git a/runtime/neurun/backend/srcn/kernel/AddLayer.cc b/runtime/neurun/backend/srcn/kernel/AddLayer.cc new file mode 100644 index 000000000..b53dfe89d --- /dev/null +++ b/runtime/neurun/backend/srcn/kernel/AddLayer.cc @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "AddLayer.h" + +#include "OperationUtils.h" +#include "ncnn/layer/binaryop.h" + +#include "cpp14/memory.h" + +namespace +{ +std::unique_ptr<nnfw::ncnn::Mat> +convertMatIgnoreLayout(neurun::backend::srcn::kernel::TensorDescriptor &desc, void *data) +{ + if (desc.dimensions.size() == 1) + { + return nnfw::cpp14::make_unique<nnfw::ncnn::Mat>(desc.dimensions[0], data); + } + else if (desc.dimensions.size() == 2) + { + return nnfw::cpp14::make_unique<nnfw::ncnn::Mat>(desc.dimensions[1], desc.dimensions[0], data); + } + else if (desc.dimensions.size() == 3) + { + return nnfw::cpp14::make_unique<nnfw::ncnn::Mat>(desc.dimensions[2], desc.dimensions[1], + desc.dimensions[0], data); + } + else // rank == 4 and N == 1 + { + return nnfw::cpp14::make_unique<nnfw::ncnn::Mat>(desc.dimensions[3], desc.dimensions[2], + desc.dimensions[1], data); + } +} +} // namespace + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ +namespace kernel +{ + +void AddLayer::addFloat32() +{ + assert(_activation == ir::Activation::NONE); + + // ncnn kernel support + // 1. rank < 4 + // 2. broadcasting + // 2-1 lhs, rhs have same rank, or + // 2-2 model layout and backend layout is same + // For safety, block all broadcasting (enable when ready) + + assert(_lhsDescr.dimensions.size() < 4 || + (_lhsDescr.dimensions.size() == 4 && _lhsDescr.dimensions[0] == 1)); + assert(_rhsDescr.dimensions.size() < 4 || + (_rhsDescr.dimensions.size() == 4 && _rhsDescr.dimensions[0] == 1)); + assert((_lhsDescr.dimensions.size() == _rhsDescr.dimensions.size())); + + nnfw::ncnn::BinaryOpParam param; + param.op_type = nnfw::ncnn::BinaryOp::Operation_ADD; + + auto lhs_mat = convertMatIgnoreLayout(_lhsDescr, _lhsData.v); + auto rhs_mat = convertMatIgnoreLayout(_rhsDescr, _rhsData.v); + auto out_mat = convertMatIgnoreLayout(_outputDescr, _outputData.v); + + ::nnfw::ncnn::ncnn_binary_op(param, *lhs_mat.get(), *rhs_mat.get(), *out_mat.get()); +} + +void AddLayer::addQuant8() +{ + // quant8 add is not implemented yet + throw std::runtime_error{"NYI"}; +} + +void AddLayer::configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData, + const TensorDescriptor &rhsDescr, const ir::Activation activation, + uint8_t *outputData, const TensorDescriptor &outputDescr, + const ir::Layout backendLayout) +{ + _lhsData.u8 = lhsData; + _lhsDescr = lhsDescr; + _rhsData.u8 = rhsData; + _rhsDescr = rhsDescr; + _inputType = lhsDescr.type; + _activation = activation; + _outputData.u8 = outputData; + _outputDescr = outputDescr; + _backendLayout = backendLayout; +} + +void AddLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + addFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + addQuant8(); + } +} + +} // namespace kernel +} // namespace srcn +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/srcn/kernel/AddLayer.h b/runtime/neurun/backend/srcn/kernel/AddLayer.h new file mode 100644 index 000000000..1cae171b5 --- /dev/null +++ b/runtime/neurun/backend/srcn/kernel/AddLayer.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_SRCN_KERNEL_ADD_LAYER_H__ +#define __NEURUN_BACKEND_SRCN_KERNEL_ADD_LAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ +namespace kernel +{ + +class AddLayer : public ::neurun::exec::IFunction +{ +public: + AddLayer() : _lhsData(), _rhsData(), _outputData(), _lhsDescr(), _rhsDescr(), _outputDescr() + { + // DO NOTHING + } + +public: + void addFloat32(); + + void addQuant8(); + + void configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData, + const TensorDescriptor &rhsDescr, const ir::Activation activation, + uint8_t *outputData, const TensorDescriptor &outputDescr, + const ir::Layout backendLayout); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _lhsData; + DataPtr _rhsData; + DataPtr _outputData; + + TensorDescriptor _lhsDescr; + TensorDescriptor _rhsDescr; + TensorDescriptor _outputDescr; + + ir::Activation _activation{ir::Activation::NONE}; + + OperandType _inputType{OperandType::FLOAT32}; + + ir::Layout _backendLayout{ir::Layout::UNKNOWN}; +}; + +} // namespace kernel +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_SRCN_KERNEL_ADD_LAYER_H__ diff --git a/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc b/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc new file mode 100644 index 000000000..4e70f6319 --- /dev/null +++ b/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConvolutionLayer.h" + +#include "OperationUtils.h" +#include <ncnn/srcn/srcn_conv.h> + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ +namespace kernel +{ + +ConvolutionLayer::ConvolutionLayer() + : _inputData(), _kernelData(), _biasData(), _outputData(), _inputDescr(), _kernelDescr(), + _biasDescr(), _outputDescr(), _paddingType(0), _paddingLeft(0), _paddingTop(0), + _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0), + _inputType(OperandType::FLOAT32), _layout(nnfw::srcn::col_major), _winograd_weights(nullptr), + _sparse_weights(nullptr) +{ + // DO NOTHING +} + +ConvolutionLayer::~ConvolutionLayer() +{ + // TODO Move managing constant _winograd_data and sparse + nnfw::srcn::winograd_release(_winograd_weights); + size_t depth_index = _layout == nnfw::srcn::col_major ? 3 : 1; + nnfw::srcn::sparse_release(_outputDescr.dimensions[depth_index], _sparse_weights); +} + +void ConvolutionLayer::convFloat32() +{ + nnfw::srcn::convMat_t in_mat, out_mat, kernel_mat; + nnfw::srcn::convParams_t in_param; + + assert(_layout == nnfw::srcn::col_major || _layout == nnfw::srcn::row_major); + size_t height_index = _layout == nnfw::srcn::col_major ? 1 : 2; + size_t width_index = _layout == nnfw::srcn::col_major ? 2 : 3; + size_t depth_index = _layout == nnfw::srcn::col_major ? 3 : 1; + size_t kernel_input_depth_index = _layout == nnfw::srcn::col_major ? 3 : 1; + size_t kernel_output_depth_index = 0; + + const int batches = MatchingDim(_inputDescr, 0, _outputDescr, 0); + const int input_height = _inputDescr.dimensions[height_index]; + const int input_width = _inputDescr.dimensions[width_index]; + const int input_depth = + MatchingDim(_inputDescr, depth_index, _kernelDescr, kernel_input_depth_index); + in_mat.c = input_depth; + in_mat.w = input_width; + in_mat.h = input_height; + in_mat.n = batches; + in_mat.data = _inputData.f; + + const int output_height = _outputDescr.dimensions[height_index]; + const int output_width = _outputDescr.dimensions[width_index]; + const int output_depth = + MatchingDim(_kernelDescr, kernel_output_depth_index, _outputDescr, depth_index); + out_mat.c = output_depth; + out_mat.w = output_width; + out_mat.h = output_height; + out_mat.n = batches; + out_mat.data = _outputData.f; + + const int outch = _kernelDescr.dimensions[kernel_output_depth_index]; + const int inch = _kernelDescr.dimensions[kernel_input_depth_index]; + const int ker_height = _kernelDescr.dimensions[height_index]; + const int ker_width = _kernelDescr.dimensions[width_index]; + kernel_mat.c = input_depth; + kernel_mat.w = ker_width; + kernel_mat.h = ker_height; + kernel_mat.n = output_depth; + kernel_mat.data = _kernelData.f; + + in_param.kernel_w = ker_width; + in_param.kernel_h = ker_height; + in_param.stride_w = _strideWidth; + in_param.stride_h = _strideHeight; + in_param.padding = _paddingType; + in_param.pad_w = _paddingLeft; + in_param.pad_h = _paddingTop; + in_param.dilation_w = 1; + in_param.dilation_h = 1; + + nnfw::srcn::winogradParams_t winograd_param; + winograd_param.kernel_w = ker_width; + winograd_param.kernel_h = ker_height; + winograd_param.stride_w = _strideWidth; + winograd_param.stride_h = _strideHeight; + winograd_param.dilation_w = 1; + winograd_param.dilation_h = 1; + winograd_param.batch = batches; + winograd_param.w = ker_width; + winograd_param.h = ker_height; + winograd_param.inch = inch; + winograd_param.outch = outch; + winograd_param.num_threads = 4; + + winograd_param.conv_type = _layout; + winograd_param.weight_data = _kernelData.f; + + // Without winograd + if (nnfw::srcn::check_winograd(winograd_param)) + { + _winograd_weights = nnfw::srcn::trans_weight2winograd(winograd_param, nullptr); + } + _sparse_weights = nnfw::srcn::trans_weight2sparse(kernel_mat); + + nnfw::srcn::srcn_convolution2D(in_mat, kernel_mat, out_mat, in_param, _winograd_weights, 4, + _layout); + + // Add biases + if (_biasData.f == nullptr) + { + return; + } + // TODO Optimize + uint32_t strides[4] = { + _outputDescr.dimensions[1] * _outputDescr.dimensions[2] * _outputDescr.dimensions[3], + _outputDescr.dimensions[2] * _outputDescr.dimensions[3], _outputDescr.dimensions[3], 1}; + if (_layout == nnfw::srcn::convType_t::col_major) + { + for (uint32_t c = 0; c < _outputDescr.dimensions[3]; ++c) + { + if (_biasData.f[c] != 0) + { + for (uint32_t b = 0; b < _outputDescr.dimensions[0]; ++b) + { + for (uint32_t h = 0; h < _outputDescr.dimensions[1]; ++h) + { + for (uint32_t w = 0; w < _outputDescr.dimensions[2]; ++w) + { + _outputData.f[b * strides[0] + h * strides[1] + w * strides[2] + c * strides[3]] += + _biasData.f[c]; + } + } + } + } + } + } + else if (_layout == nnfw::srcn::convType_t::row_major) + { + for (uint32_t c = 0; c < _outputDescr.dimensions[1]; ++c) + { + if (_biasData.f[c] != 0) + { + for (uint32_t b = 0; b < _outputDescr.dimensions[0]; ++b) + { + for (uint32_t h = 0; h < _outputDescr.dimensions[2]; ++h) + { + for (uint32_t w = 0; w < _outputDescr.dimensions[3]; ++w) + { + _outputData.f[b * strides[0] + c * strides[1] + h * strides[2] + w * strides[3]] += + _biasData.f[c]; + } + } + } + } + } + } + else + { + throw std::runtime_error("Wrong Layout"); + } +} + +void ConvolutionLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, + uint8_t *kernelData, const TensorDescriptor kernelDescr, + uint8_t *biasData, const TensorDescriptor biasDescr, + const uint32_t paddingType, const uint32_t paddingLeft, + const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideWidth, + const uint32_t strideHeight, uint8_t *outputData, + const TensorDescriptor outputDescr, ir::Layout layout) +{ + assert(inputDescr.dimensions.size() == 4); + assert(kernelDescr.dimensions.size() == 4); + assert(biasDescr.dimensions.size() == 1); + assert(outputDescr.dimensions.size() == 4); + assert(inputDescr.type == kernelDescr.type && inputDescr.type == outputDescr.type); + // TODO Add assertions validating height and width with padding + _layout = convertLayout(layout); + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _kernelData.u8 = kernelData; + _kernelDescr = kernelDescr; + _biasData.u8 = biasData; + _biasDescr = biasDescr; + _paddingType = paddingType; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void ConvolutionLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + convFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + throw std::runtime_error("NYI"); + } +} + +} // namespace kernel +} // namespace srcn +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.h b/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.h new file mode 100644 index 000000000..4edafaa87 --- /dev/null +++ b/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_SRCN_KERNEL_CONVOLUTION_LAYER_H__ +#define __NEURUN_BACKEND_SRCN_KERNEL_CONVOLUTION_LAYER_H__ + +#include <exec/IFunction.h> +#include <ncnn/srcn/conv_type.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ +namespace kernel +{ + +class ConvolutionLayer : public ::neurun::exec::IFunction +{ +public: + ConvolutionLayer(); + ~ConvolutionLayer(); + +public: + void convFloat32(); + void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *kernelData, + const TensorDescriptor kernelDescr, uint8_t *biasData, + const TensorDescriptor biasDescr, const uint32_t paddingType, + const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH, + uint8_t *outputData, const TensorDescriptor outputDescr, ir::Layout layout); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _kernelData; + DataPtr _biasData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _kernelDescr; + TensorDescriptor _biasDescr; + TensorDescriptor _outputDescr; + + uint32_t _paddingType; + uint32_t _paddingLeft; + uint32_t _paddingTop; + uint32_t _paddingRight; + uint32_t _paddingBottom; + + uint32_t _strideWidth; + uint32_t _strideHeight; + + OperandType _inputType; + nnfw::srcn::convType_t _layout; + + float *_winograd_weights; + void *_sparse_weights; +}; + +} // namespace kernel +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_SRCN_KERNEL_CONVOLUTION_LAYER_H__ diff --git a/runtime/neurun/backend/srcn/kernel/DepthwiseConvolutionLayer.cc b/runtime/neurun/backend/srcn/kernel/DepthwiseConvolutionLayer.cc new file mode 100644 index 000000000..a1718c500 --- /dev/null +++ b/runtime/neurun/backend/srcn/kernel/DepthwiseConvolutionLayer.cc @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DepthwiseConvolutionLayer.h" + +#include <ncnn/srcn/srcn_conv.h> +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ +namespace kernel +{ + +DepthwiseConvolutionLayer::DepthwiseConvolutionLayer() + : _inputData(), _kernelData(), _biasData(), _outputData(), _inputDescr(), _kernelDescr(), + _biasDescr(), _outputDescr(), _paddingType(0), _paddingLeft(0), _paddingTop(0), + _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0), + _inputType(OperandType::FLOAT32), _layout(nnfw::srcn::col_major) +{ + // DO NOTHING +} + +void DepthwiseConvolutionLayer::convFloat32() +{ + nnfw::srcn::convMat_t in_mat, out_mat, kernel_mat, bias_mat; + nnfw::srcn::convParams_t in_param; + + assert(_layout == nnfw::srcn::col_major || _layout == nnfw::srcn::row_major); + size_t height_index = _layout == nnfw::srcn::col_major ? 1 : 2; + size_t width_index = _layout == nnfw::srcn::col_major ? 2 : 3; + size_t depth_index = _layout == nnfw::srcn::col_major ? 3 : 1; + + const int batches = MatchingDim(_inputDescr, 0, _outputDescr, 0); + const int input_height = _inputDescr.dimensions[height_index]; + const int input_width = _inputDescr.dimensions[width_index]; + const int input_depth = _inputDescr.dimensions[depth_index]; + in_mat.c = input_depth; + in_mat.w = input_width; + in_mat.h = input_height; + in_mat.n = batches; + in_mat.data = _inputData.f; + + const int output_height = _outputDescr.dimensions[height_index]; + const int output_width = _outputDescr.dimensions[width_index]; + const int output_depth = MatchingDim(_kernelDescr, depth_index, _outputDescr, depth_index); + out_mat.c = output_depth; + out_mat.w = output_width; + out_mat.h = output_height; + out_mat.n = batches; + out_mat.data = _outputData.f; + + const int ker_height = _kernelDescr.dimensions[height_index]; + const int ker_width = _kernelDescr.dimensions[width_index]; + kernel_mat.c = MatchingDim(_kernelDescr, depth_index, _outputDescr, depth_index); + kernel_mat.w = ker_width; + kernel_mat.h = ker_height; + kernel_mat.n = 1; + kernel_mat.data = _kernelData.f; + + const int bias_depth = MatchingDim(_biasDescr, 0, _outputDescr, depth_index); + bias_mat.c = bias_depth; + bias_mat.data = _biasData.f; + + in_param.kernel_w = ker_width; + in_param.kernel_h = ker_height; + in_param.stride_w = _strideWidth; + in_param.stride_h = _strideHeight; + in_param.padding = _paddingType; + in_param.pad_w = _paddingLeft; + in_param.pad_h = _paddingTop; + in_param.dilation_w = 1; + in_param.dilation_h = 1; + + nnfw::srcn::srcn_depthwise_conv(in_mat, kernel_mat, out_mat, bias_mat, in_param, 4, _layout); + + // Add biases + if (_biasData.f == nullptr) + { + return; + } + // TODO Optimize + uint32_t strides[4] = { + _outputDescr.dimensions[1] * _outputDescr.dimensions[2] * _outputDescr.dimensions[3], + _outputDescr.dimensions[2] * _outputDescr.dimensions[3], _outputDescr.dimensions[3], 1}; + if (_layout == nnfw::srcn::convType_t::col_major) + { + for (uint32_t c = 0; c < _outputDescr.dimensions[3]; ++c) + { + if (_biasData.f[c] != 0) + { + for (uint32_t b = 0; b < _outputDescr.dimensions[0]; ++b) + { + for (uint32_t h = 0; h < _outputDescr.dimensions[1]; ++h) + { + for (uint32_t w = 0; w < _outputDescr.dimensions[2]; ++w) + { + _outputData.f[b * strides[0] + h * strides[1] + w * strides[2] + c * strides[3]] += + _biasData.f[c]; + } + } + } + } + } + } + else if (_layout == nnfw::srcn::convType_t::row_major) + { + for (uint32_t c = 0; c < _outputDescr.dimensions[1]; ++c) + { + if (_biasData.f[c] != 0) + { + for (uint32_t b = 0; b < _outputDescr.dimensions[0]; ++b) + { + for (uint32_t h = 0; h < _outputDescr.dimensions[2]; ++h) + { + for (uint32_t w = 0; w < _outputDescr.dimensions[3]; ++w) + { + _outputData.f[b * strides[0] + c * strides[1] + h * strides[2] + w * strides[3]] += + _biasData.f[c]; + } + } + } + } + } + } + else + { + throw std::runtime_error("Wrong Layout"); + } +} + +void DepthwiseConvolutionLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, + uint8_t *kernelData, const TensorDescriptor kernelDescr, + uint8_t *biasData, const TensorDescriptor biasDescr, + const uint32_t paddingType, const uint32_t paddingLeft, + const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideWidth, + const uint32_t strideHeight, uint8_t *outputData, + const TensorDescriptor outputDescr, ir::Layout layout) +{ + assert(layout == ir::Layout::NHWC || layout == ir::Layout::NCHW); + + const auto height_index = layout == ir::Layout::NHWC ? 1 : 2; + const auto width_index = layout == ir::Layout::NHWC ? 2 : 3; + + if (layout == ir::Layout::NHWC) + { + throw std::runtime_error("DepthwiseConv of ncnn does not support layout yet in NHWC"); + } + + if (kernelDescr.dimensions[height_index] != 3 || kernelDescr.dimensions[width_index] != 3) + { + throw std::runtime_error("DepthwiseConv of ncnn supports only 3x3 kernel now"); + } + + assert(inputDescr.dimensions.size() == 4); + assert(kernelDescr.dimensions.size() == 4); + assert(biasDescr.dimensions.size() == 1); + assert(outputDescr.dimensions.size() == 4); + assert(inputDescr.type == kernelDescr.type && inputDescr.type == outputDescr.type); + // TODO Add assertions validating height and width with padding + _layout = convertLayout(layout); + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _kernelData.u8 = kernelData; + _kernelDescr = kernelDescr; + _biasData.u8 = biasData; + _biasDescr = biasDescr; + _paddingType = paddingType; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void DepthwiseConvolutionLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + convFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + throw std::runtime_error("NYI"); + } +} + +} // namespace kernel +} // namespace srcn +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/srcn/kernel/DepthwiseConvolutionLayer.h b/runtime/neurun/backend/srcn/kernel/DepthwiseConvolutionLayer.h new file mode 100644 index 000000000..e94acff08 --- /dev/null +++ b/runtime/neurun/backend/srcn/kernel/DepthwiseConvolutionLayer.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_SRCN_KERNEL_DEPTHWISE_CONVOLUTION_LAYER_H__ +#define __NEURUN_BACKEND_SRCN_KERNEL_DEPTHWISE_CONVOLUTION_LAYER_H__ + +#include <exec/IFunction.h> +#include <ncnn/srcn/conv_type.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ +namespace kernel +{ + +class DepthwiseConvolutionLayer : public ::neurun::exec::IFunction +{ +public: + DepthwiseConvolutionLayer(); + +public: + void convFloat32(); + void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *kernelData, + const TensorDescriptor kernelDescr, uint8_t *biasData, + const TensorDescriptor biasDescr, const uint32_t paddingType, + const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH, + uint8_t *outputData, const TensorDescriptor outputDescr, ir::Layout layout); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _kernelData; + DataPtr _biasData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _kernelDescr; + TensorDescriptor _biasDescr; + TensorDescriptor _outputDescr; + + uint32_t _paddingType; + uint32_t _paddingLeft; + uint32_t _paddingTop; + uint32_t _paddingRight; + uint32_t _paddingBottom; + + uint32_t _strideWidth; + uint32_t _strideHeight; + + OperandType _inputType; + nnfw::srcn::convType_t _layout; +}; + +} // namespace kernel +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_SRCN_KERNEL_DEPTHWISE_CONVOLUTION_LAYER_H__ diff --git a/runtime/neurun/backend/srcn/kernel/InstanceNormLayer.cc b/runtime/neurun/backend/srcn/kernel/InstanceNormLayer.cc new file mode 100644 index 000000000..c83fe6d67 --- /dev/null +++ b/runtime/neurun/backend/srcn/kernel/InstanceNormLayer.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "InstanceNormLayer.h" + +#include "OperationUtils.h" +#include "ncnn/layer/instance_norm.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ +namespace kernel +{ + +InstanceNormLayer::InstanceNormLayer() + : _inputData(), _gammaData(), _betaData(), _outputData(), _inputDescr(), _gammaDescr(), + _betaDescr(), _outputDescr(), _epsilon(1e-5), _activation(ir::Activation::NONE), + _inputType(OperandType::FLOAT32), _backendLayout(ir::Layout::UNKNOWN) +{ + // DO NOTHING +} + +void InstanceNormLayer::instanceNormFloat32() +{ + // Call kernel for NCHW data layout + if (_backendLayout == ir::Layout::NCHW) + { + // Supports single batch only + assert(_inputDescr.dimensions[0] == 1); + const int input_channels = _inputDescr.dimensions[1]; + const int input_height = _inputDescr.dimensions[2]; + const int input_width = _inputDescr.dimensions[3]; + nnfw::ncnn::Mat in_mat(input_width, input_height, input_channels, _inputData.f); + + const int gamma_channels = _gammaDescr.dimensions[0]; + nnfw::ncnn::Mat gamma_mat(gamma_channels, _gammaData.f); + + const int beta_channels = _betaDescr.dimensions[0]; + nnfw::ncnn::Mat beta_mat(beta_channels, _betaData.f); + + assert(_outputDescr.dimensions[0] == 1); + const int output_channels = _outputDescr.dimensions[1]; + const int output_height = _outputDescr.dimensions[2]; + const int output_width = _outputDescr.dimensions[3]; + nnfw::ncnn::Mat out_mat(output_width, output_height, output_channels, _outputData.f); + + if (_activation == ir::Activation::NONE) + { + nnfw::ncnn::ncnn_instance_norm_rowmajor(in_mat, out_mat, gamma_mat, beta_mat, input_channels, + _epsilon); + } + else if (_activation == ir::Activation::RELU) + { + nnfw::ncnn::ncnn_instance_norm_with_relu_rowmajor(in_mat, out_mat, gamma_mat, beta_mat, + input_channels, _epsilon, 0.f); + } + else + { + std::runtime_error("Unsupported activation type"); + } + } + // Call kernel for NHWC data layout + else if (_backendLayout == ir::Layout::NHWC) + { + // Supports single batch only + assert(_inputDescr.dimensions[0] == 1); + const int input_height = _inputDescr.dimensions[1]; + const int input_width = _inputDescr.dimensions[2]; + const int input_channels = _inputDescr.dimensions[3]; + nnfw::ncnn::Mat in_mat(input_channels, input_width, input_height, _inputData.f); + + const int gamma_channels = _gammaDescr.dimensions[0]; + nnfw::ncnn::Mat gamma_mat(gamma_channels, _gammaData.f); + + const int beta_channels = _betaDescr.dimensions[0]; + nnfw::ncnn::Mat beta_mat(beta_channels, _betaData.f); + + assert(_outputDescr.dimensions[0] == 1); + const int output_height = _outputDescr.dimensions[1]; + const int output_width = _outputDescr.dimensions[2]; + const int output_channels = _outputDescr.dimensions[3]; + nnfw::ncnn::Mat out_mat(output_channels, output_width, output_height, _outputData.f); + + if (_activation == ir::Activation::NONE) + { + nnfw::ncnn::ncnn_instance_norm_colmajor(in_mat, out_mat, gamma_mat, beta_mat, input_channels, + _epsilon); + } + else if (_activation == ir::Activation::RELU) + { + nnfw::ncnn::ncnn_instance_norm_with_relu_colmajor(in_mat, out_mat, gamma_mat, beta_mat, + input_channels, _epsilon, 0.f); + } + { + std::runtime_error("Unsupported activation type"); + } + } + else + { + std::runtime_error("Unsupported backend layout"); + } +} + +void InstanceNormLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, + uint8_t *gammaData, const TensorDescriptor gammaDescr, + uint8_t *betaData, const TensorDescriptor betaDescr, + uint8_t *outputData, const TensorDescriptor outputDescr, + float epsilon, ir::Activation activation, + ir::Layout backendLayout) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _gammaData.u8 = gammaData; + _gammaDescr = gammaDescr; + _betaData.u8 = betaData; + _betaDescr = betaDescr; + _outputData.u8 = outputData; + _outputDescr = outputDescr; + _epsilon = epsilon; + _activation = activation; + _backendLayout = backendLayout; +} + +void InstanceNormLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + instanceNormFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + throw std::runtime_error("NYI"); + } +} + +} // namespace kernel +} // namespace srcn +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/srcn/kernel/InstanceNormLayer.h b/runtime/neurun/backend/srcn/kernel/InstanceNormLayer.h new file mode 100644 index 000000000..0ac0cef3f --- /dev/null +++ b/runtime/neurun/backend/srcn/kernel/InstanceNormLayer.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_SRCN_KERNEL_INSTANCENORM_LAYER_H__ +#define __NEURUN_BACKEND_SRCN_KERNEL_INSTANCENORM_LAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ +namespace kernel +{ + +class InstanceNormLayer : public ::neurun::exec::IFunction +{ +public: + InstanceNormLayer(); + +public: + void instanceNormFloat32(); + void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *gammaData, + const TensorDescriptor gammaDescr, uint8_t *betaData, + const TensorDescriptor betaDescr, uint8_t *outputData, + const TensorDescriptor outputDescr, float epsilon, ir::Activation activation, + ir::Layout backendLayout); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _gammaData; + DataPtr _betaData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _gammaDescr; + TensorDescriptor _betaDescr; + TensorDescriptor _outputDescr; + + float _epsilon; + ir::Activation _activation; + + OperandType _inputType; + ir::Layout _backendLayout; +}; + +} // namespace kernel +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_SRCN_KERNEL_INSTANCENORM_LAYER_H__ diff --git a/runtime/neurun/backend/srcn/kernel/OperationUtils.cc b/runtime/neurun/backend/srcn/kernel/OperationUtils.cc new file mode 100644 index 000000000..684573a51 --- /dev/null +++ b/runtime/neurun/backend/srcn/kernel/OperationUtils.cc @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OperationUtils.h" + +#include <cmath> +#include <algorithm> +#include <cassert> +#include <map> + +#include "util/Utils.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ +namespace kernel +{ + +uint32_t MatchingDim(const TensorDescriptor &descr1, int index1, const TensorDescriptor &descr2, + int index2) +{ + UNUSED_RELEASE(descr2); + UNUSED_RELEASE(index2); + assert(descr1.dimensions[index1] == descr2.dimensions[index2]); + return descr1.dimensions[index1]; +} + +std::vector<int32_t> getFilterPermutation(FilterLayout from_layout, FilterLayout to_layout) +{ + static std::map<std::pair<FilterLayout, FilterLayout>, std::vector<int32_t>> filter_permutation = + {{std::make_pair(FilterLayout::OHWI, FilterLayout::HWOI), {1, 2, 0, 3}}, + {std::make_pair(FilterLayout::OHWI, FilterLayout::HWIO), {1, 2, 3, 0}}, + {std::make_pair(FilterLayout::OHWI, FilterLayout::OIHW), {0, 3, 1, 2}}, + {std::make_pair(FilterLayout::OHWI, FilterLayout::IOHW), {3, 0, 1, 2}}, + {std::make_pair(FilterLayout::OIHW, FilterLayout::HWOI), {2, 3, 0, 1}}, + {std::make_pair(FilterLayout::OIHW, FilterLayout::HWIO), {2, 3, 1, 0}}, + {std::make_pair(FilterLayout::OIHW, FilterLayout::OHWI), {0, 2, 3, 1}}, + {std::make_pair(FilterLayout::OIHW, FilterLayout::IOHW), {1, 0, 2, 3}}}; + + const auto pair = std::make_pair(from_layout, to_layout); + const auto it = filter_permutation.find(pair); + if (it == filter_permutation.end()) + { + return std::vector<int32_t>{}; + } + return it->second; +} + +Coordinates convertCoordinates(const Coordinates &coordinates, FilterLayout from_layout, + FilterLayout to_layout) +{ + assert(coordinates.size() == 4); + if (from_layout == to_layout) + { + return coordinates; + } + + const auto permutation = getFilterPermutation(from_layout, to_layout); + if (permutation.size() == 0) + { + throw std::runtime_error("Not supported FilterLayout"); + } + return Coordinates{coordinates[permutation[0]], coordinates[permutation[1]], + coordinates[permutation[2]], coordinates[permutation[3]]}; +} + +nnfw::srcn::convType_t convertLayout(ir::Layout layout) +{ + if (layout == ir::Layout::NHWC) + { + return nnfw::srcn::col_major; + } + else if (layout == ir::Layout::NCHW) + { + return nnfw::srcn::row_major; + } + else + { + throw std::runtime_error("Not supported layout"); + } +} + +TensorDescriptor getTensorDescriptor(const ir::Operand &o, ir::Layout frontend_layout, + ir::Layout backend_layout) +{ + TensorDescriptor descriptor; + + auto dims = o.shape().dims(); + if (frontend_layout == ir::Layout::NHWC && backend_layout == ir::Layout::NCHW && + o.shape().rank() == 4) + { + // NHWC -> NCHW + uint32_t permutation[4] = {0, 3, 1, 2}; + for (int i = 0; i < o.shape().rank(); ++i) + { + dims.at(i) = o.shape().dim(permutation[i]); + } + } + else if (frontend_layout == ir::Layout::NCHW && backend_layout == ir::Layout::NHWC && + o.shape().rank() == 4) + { + // NCHW -> NHWC + uint32_t permutation[4] = {0, 2, 3, 1}; + for (int i = 0; i < o.shape().rank(); ++i) + { + dims.at(i) = o.shape().dim(permutation[i]); + } + } + descriptor.dimensions = std::vector<uint32_t>(dims.begin(), dims.end()); + descriptor.type = static_cast<OperandType>(static_cast<int32_t>(o.typeInfo().type())); + descriptor.scale = o.typeInfo().scale(); + descriptor.offset = o.typeInfo().offset(); + + // CPU backend assume that neurun internal descriptor's rank is always same or less than 4 + assert(descriptor.dimensions.size() <= 4); + + return descriptor; +} + +} // namespace kernel +} // namespace srcn +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/srcn/kernel/OperationUtils.h b/runtime/neurun/backend/srcn/kernel/OperationUtils.h new file mode 100644 index 000000000..aa163a1f3 --- /dev/null +++ b/runtime/neurun/backend/srcn/kernel/OperationUtils.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_SRCN_OPERATION_UTILS_H__ +#define __NEURUN_BACKEND_SRCN_OPERATION_UTILS_H__ + +#include <iostream> +#include <limits> +#include <vector> + +#include "ir/Operand.h" +#include "ir/DataType.h" +#include <ir/InternalType.h> +#include <ncnn/srcn/conv_type.h> + +using OperandType = neurun::ir::DataType; +using neurun::util::Coordinates; + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ +namespace kernel +{ + +struct TensorDescriptor +{ + OperandType type; + std::vector<uint32_t> dimensions; + float scale; + int32_t offset; +}; + +union DataPtr { + uint8_t *u8; + int8_t *i8; + int32_t *i32; + float *f; + void *v; +}; + +enum FilterLayout +{ + OHWI = 0, // TfLite Kernel Layout when using NHWC image layout + HWOI, // SRCN Transpose Conv Kernel Layout when using NHWC image layout + OIHW, // SRCN Conv Kernel Layout when using NCHW image layout + HWIO, // SRCN Conv Kernel Layout when using NHWC image layout + IOHW, // SRCN Transpose Conv Kernel Layout when using NCHW image layout +}; + +uint32_t MatchingDim(const TensorDescriptor &shape1, int index1, const TensorDescriptor &shape2, + int index2); + +std::vector<int32_t> getFilterPermutation(FilterLayout from_layout, FilterLayout to_layout); + +Coordinates convertCoordinates(const Coordinates &from_coordinates, FilterLayout from_layout, + FilterLayout to_layout); + +nnfw::srcn::convType_t convertLayout(ir::Layout layout); + +TensorDescriptor getTensorDescriptor(const ir::Operand &o, ir::Layout frontend_layout, + ir::Layout backend_layout); + +} // namespace kernel +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_SRCN_OPERATION_UTILS_H__ diff --git a/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.cc b/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.cc new file mode 100644 index 000000000..26469f728 --- /dev/null +++ b/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.cc @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TransposeConvLayer.h" + +#include <cstring> +#include "OperationUtils.h" +#include "ncnn/srcn/srcn_conv.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ +namespace kernel +{ + +TransposeConvLayer::TransposeConvLayer() + : _inputData(), _kernelData(), _outputData(), _inputDescr(), _kernelDescr(), _outputDescr(), + _paddingType(0), _paddingLeft(0), _paddingTop(0), _paddingRight(0), _paddingBottom(0), + _strideWidth(0), _strideHeight(0), _inputType(OperandType::FLOAT32), + _layout(nnfw::srcn::col_major) +{ + // DO NOTHING +} + +void TransposeConvLayer::convFloat32() +{ + nnfw::srcn::convMat_t in_mat, out_mat, kernel_mat; + nnfw::srcn::convParams_t in_param; + + assert(_layout == nnfw::srcn::col_major || _layout == nnfw::srcn::row_major); + size_t height_index = _layout == nnfw::srcn::col_major ? 1 : 2; + size_t width_index = _layout == nnfw::srcn::col_major ? 2 : 3; + size_t depth_index = _layout == nnfw::srcn::col_major ? 3 : 1; + size_t kernel_input_depth_index = _layout == nnfw::srcn::col_major ? 3 : 1; + size_t kernel_output_depth_index = 0; + const int batches = MatchingDim(_inputDescr, 0, _outputDescr, 0); + const int input_height = _inputDescr.dimensions[height_index]; + const int input_width = _inputDescr.dimensions[width_index]; + const int input_depth = + MatchingDim(_inputDescr, depth_index, _kernelDescr, kernel_input_depth_index); + in_mat.c = input_depth; + in_mat.w = input_width; + in_mat.h = input_height; + in_mat.n = batches; + in_mat.data = _inputData.f; + + const int output_height = _outputDescr.dimensions[height_index]; + const int output_width = _outputDescr.dimensions[width_index]; + const int output_depth = + MatchingDim(_kernelDescr, kernel_output_depth_index, _outputDescr, depth_index); + out_mat.c = output_depth; + out_mat.w = output_width; + out_mat.h = output_height; + out_mat.n = batches; + out_mat.data = _outputData.f; + + const int ker_height = _kernelDescr.dimensions[height_index]; + const int ker_width = _kernelDescr.dimensions[width_index]; + kernel_mat.c = input_depth; + kernel_mat.w = ker_width; + kernel_mat.h = ker_height; + kernel_mat.n = output_depth; + kernel_mat.data = _kernelData.f; + + in_param.kernel_w = ker_width; + in_param.kernel_h = ker_height; + in_param.stride_w = _strideWidth; + in_param.stride_h = _strideHeight; + in_param.padding = _paddingType; + in_param.pad_w = _paddingLeft; + in_param.pad_h = _paddingTop; + in_param.dilation_w = 1; + in_param.dilation_h = 1; + + memset(_outputData.f, 0, out_mat.n * out_mat.h * out_mat.w * out_mat.c * sizeof(float)); + + nnfw::srcn::srcn_deconvolution2D(in_mat, kernel_mat, out_mat, in_param, 4, _layout); +} + +void TransposeConvLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, + uint8_t *kernelData, const TensorDescriptor kernelDescr, + const uint32_t paddingType, const uint32_t paddingLeft, + const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideWidth, + const uint32_t strideHeight, uint8_t *outputData, + const TensorDescriptor outputDescr, ir::Layout layout) +{ + _layout = convertLayout(layout); + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _kernelData.u8 = kernelData; + _kernelDescr = kernelDescr; + _paddingType = paddingType; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void TransposeConvLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + convFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + throw std::runtime_error("NYI"); + } +} + +} // namespace kernel +} // namespace srcn +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.h b/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.h new file mode 100644 index 000000000..cd88d4127 --- /dev/null +++ b/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_SRCN_KERNEL_TRANSPOSECONV_LAYER_H__ +#define __NEURUN_BACKEND_SRCN_KERNEL_TRANSPOSECONV_LAYER_H__ + +#include <exec/IFunction.h> +#include <ncnn/srcn/conv_type.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ +namespace kernel +{ + +class TransposeConvLayer : public ::neurun::exec::IFunction +{ +public: + TransposeConvLayer(); + +public: + void convFloat32(); + void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *kernelData, + const TensorDescriptor kernelDescr, const uint32_t paddingType, + const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH, + uint8_t *outputData, const TensorDescriptor outputDescr, + ir::Layout backend_layout); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _kernelData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _kernelDescr; + TensorDescriptor _outputDescr; + + uint32_t _paddingType; + uint32_t _paddingLeft; + uint32_t _paddingTop; + uint32_t _paddingRight; + uint32_t _paddingBottom; + + uint32_t _strideWidth; + uint32_t _strideHeight; + + OperandType _inputType; + nnfw::srcn::convType_t _layout; +}; + +} // namespace kernel +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_SRCN_KERNEL_TRANSPOSECONV_LAYER_H__ diff --git a/runtime/neurun/backend/srcn/operand/Tensor.cc b/runtime/neurun/backend/srcn/operand/Tensor.cc new file mode 100644 index 000000000..8a53f97c5 --- /dev/null +++ b/runtime/neurun/backend/srcn/operand/Tensor.cc @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Tensor.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ +namespace operand +{ + +size_t Tensor::calcOffset(const neurun::util::Coordinates &coords) const +{ + size_t rank = num_dimensions(); + size_t offset = 0; + for (size_t i = 0; i < rank; ++i) + { + offset = offset * dimension(i) + coords[i]; + } + offset *= sizeOfDataType(data_type()); + return offset; +} + +void Tensor::access(const std::function<void(ITensor &)> &fn) { fn(*this); } + +} // namespace operand +} // namespace srcn +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/srcn/operand/Tensor.h b/runtime/neurun/backend/srcn/operand/Tensor.h new file mode 100644 index 000000000..e16234a81 --- /dev/null +++ b/runtime/neurun/backend/srcn/operand/Tensor.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_SRCN_OPERAND_TENSOR_H__ +#define __NEURUN_BACKEND_SRCN_OPERAND_TENSOR_H__ + +#include <backend/operand/ITensor.h> +#include <ir/Layout.h> +#include "ir/OperandInfo.h" + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ +namespace operand +{ + +class Tensor : public ::neurun::backend::operand::ITensor +{ +public: + Tensor() = delete; + +public: + Tensor(const ir::OperandInfo &info, ir::Layout layout) : _info(info), _layout(layout) + { + // DO NOTHING + } + +public: + void setBuffer(uint8_t *buffer) { _buffer = buffer; } + ir::DataType data_type() const { return _info.typeInfo().type(); } + +public: + uint8_t *buffer() const override { return _buffer; } + /** + * @brief Get dimension by index + * + * @param index Index to get diemension + * @return size_t Dimension at index + * @note N : dimension(0) + * H : dimension(1) + * W : dimension(2) + * C : dimension(3) + */ + size_t dimension(size_t index) const override { return _info.shape().dim(index); } + size_t num_dimensions() const override { return _info.shape().rank(); } + size_t total_size() const override { return _info.total_size(); } + size_t calcOffset(const neurun::util::Coordinates &coords) const override; + ir::Layout layout() const override { return _layout; } + bool has_padding() const override { return false; } + void access(const std::function<void(ITensor &tensor)> &fn) final; + +private: + ir::OperandInfo _info; + uint8_t *_buffer = nullptr; + ir::Layout _layout; +}; + +} // namespace operand +} // namespace srcn +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_SRCN_OPERAND_TENSOR_H__ diff --git a/runtime/neurun/core/CMakeLists.txt b/runtime/neurun/core/CMakeLists.txt new file mode 100644 index 000000000..422d292bb --- /dev/null +++ b/runtime/neurun/core/CMakeLists.txt @@ -0,0 +1,18 @@ +file(GLOB_RECURSE SOURCES "src/*.cc") + +add_library(neurun_core SHARED ${SOURCES}) +set_target_properties(neurun_core PROPERTIES POSITION_INDEPENDENT_CODE ON) +target_include_directories(neurun_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) +target_include_directories(neurun_core PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src) +target_link_libraries(neurun_core PUBLIC nnfw_lib_misc) +target_link_libraries(neurun_core PUBLIC nnfw_lib_cpp14) +target_link_libraries(neurun_core PRIVATE nnfw_lib_cker) +target_link_libraries(neurun_core PRIVATE nnfw_common) +target_link_libraries(neurun_core PRIVATE nnfw_coverage) +target_link_libraries(neurun_core PRIVATE dl ${LIB_PTHREAD}) + +if(ENVVAR_NEURUN_CONFIG) + target_compile_definitions(neurun_core PRIVATE ENVVAR_FOR_DEFAULT_CONFIG) +endif(ENVVAR_NEURUN_CONFIG) + +install(TARGETS neurun_core DESTINATION lib) diff --git a/runtime/neurun/core/include/backend/Backend.h b/runtime/neurun/core/include/backend/Backend.h new file mode 100644 index 000000000..9c4484fca --- /dev/null +++ b/runtime/neurun/core/include/backend/Backend.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_BACKEND_H__ +#define __NEURUN_BACKEND_BACKEND_H__ + +#include <memory> + +#include "ir/Operands.h" + +namespace neurun +{ +namespace backend +{ + +namespace custom +{ +class IKernelBuilder; +} + +class Backend; +struct IConfig; +class IConstantInitializer; +class IKernelGenerator; +class IShapeFixer; +class ITensorRegister; +struct ITensorBuilder; + +class BackendContext +{ +public: + const Backend *backend; + std::shared_ptr<ITensorBuilder> tensor_builder; + std::shared_ptr<IConstantInitializer> constant_initializer; + std::shared_ptr<IKernelGenerator> kernel_gen; + std::shared_ptr<IShapeFixer> shape_fixer; + std::shared_ptr<ITensorRegister> tensor_register; +}; + +class Backend +{ +public: + virtual ~Backend() = default; + virtual std::shared_ptr<neurun::backend::IConfig> config() const = 0; + + virtual std::unique_ptr<BackendContext> + newContext(const ir::Operands &operands, + const std::shared_ptr<backend::custom::IKernelBuilder> &kb) const = 0; +}; + +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_BACKEND_H__ diff --git a/runtime/neurun/core/include/backend/CustomKernelBuilder.h b/runtime/neurun/core/include/backend/CustomKernelBuilder.h new file mode 100644 index 000000000..848ebd595 --- /dev/null +++ b/runtime/neurun/core/include/backend/CustomKernelBuilder.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CUSTOM_KERNEL_BUILDER_H__ +#define __NEURUN_BACKEND_CUSTOM_KERNEL_BUILDER_H__ + +#include "exec/IFunction.h" + +#include "misc/tensor/Shape.h" +#include "ir/DataType.h" + +#include <vector> +#include <memory> + +namespace neurun +{ +namespace backend +{ +namespace custom +{ + +using Shape = nnfw::misc::tensor::Shape; + +struct TypeInfo +{ + Shape shape; + ir::DataType dtype; +}; + +struct CustomKernelConfigParams +{ + std::vector<void *> input_allocations; + std::vector<TypeInfo> input_types; + + std::vector<void *> output_allocations; + std::vector<TypeInfo> output_types; + + char *userdata; + size_t userdata_size; +}; + +class IKernelBuilder +{ +public: + virtual ~IKernelBuilder() = default; + virtual std::unique_ptr<exec::IFunction> buildKernel(const std::string &id, + CustomKernelConfigParams &¶ms) const = 0; +}; + +} // namespace custom + +} // namespace backend + +} // namespace neurun + +#endif // __NEURUN_BACKEND_CUSTOM_KERNEL_BUILDER_H__ diff --git a/runtime/neurun/core/include/backend/ExecTime.h b/runtime/neurun/core/include/backend/ExecTime.h new file mode 100644 index 000000000..4eaf49fab --- /dev/null +++ b/runtime/neurun/core/include/backend/ExecTime.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_EXEC_TIME_H__ +#define __NEURUN_BACKEND_EXEC_TIME_H__ + +#include "backend/Backend.h" +#include "backend/IConfig.h" +#include "JSONExecTime.h" +#include <memory> +#include <limits> +#include <map> +#include <unordered_map> +#include <vector> + +namespace neurun +{ +namespace backend +{ +class ExecTime +{ +public: + explicit ExecTime(const std::vector<const Backend *> &backends) : _json(backends, _measurements) + { + } + +public: + /** + * @brief Get exec time of an operation with input size + * or linearly interpolated value based on size if there is no record for given size + * + * @param[in] backend id of a backend + * @param[in] operation name of an operation + * @param[in] quant if input type quantized + * @param[in] op_size sum of operation's flattened sizes of inputs and outputs + * @return execution time for given input sizes + * -1 if there are no records for given parameters (backend, op, quantization). + */ + int64_t getOperationExecTime(const Backend *backend, const std::string &operation, bool quant, + uint32_t op_size) const; + /** + * @brief Update exec time of the operation on a backend with given input size or + * add new entity if there is no one. + * + * @param[in] backend id of a backend + * @param[in] operation name of an operation + * @param[in] quant if input type quantized + * @param[in] op_size sum of operation's flattened sizes of inputs and outputs + * @param[in] time real measured value + */ + void updateOperationExecTime(const Backend *backend, const std::string &operation, bool quant, + uint32_t op_size, int64_t time); + /** + * @brief Get the permute time from one backend to another + * + * @param[in] from_backend + * @param[in] to_backend + * @param[in] quant if input type quantized + * @param[in] op_size sum of operation's flattened sizes of inputs and outputs + * @return permutation time for operation size + */ + int64_t getPermuteTime(const Backend *from_backend, const Backend *to_backend, bool quant, + uint32_t op_size) const; + /** + * @brief Update permute time from one backend to another + * + * @param[in] from_backend + * @param[in] to_backend + * @param[in] quant if input type quantized + * @param[in] time measured permutation time + * @param[in] op_size sum of operation's flattened sizes of inputs and outputs + */ + void updatePermuteTime(const Backend *from_backend, const Backend *to_backend, bool quant, + uint32_t op_size, int64_t time); + /** + * @brief Get the max value of int32_t in int64_t + * @return max value + */ + static int64_t getMax() { return _MAX; } + /** + * @brief Update metrics file with new data. + */ + void uploadOperationsExecTime() const { _json.uploadOperationsExecTime(); } + static const int64_t NOT_FOUND = -1; + +private: + /// @brief Measurement data, which is shared with serializer + MeasurementData _measurements; + // int64_t::max may cause integer overflow + static const int64_t _MAX = std::numeric_limits<int32_t>::max(); + /// @brief Serializer + JSON _json; +}; + +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_EXEC_TIME_H__ diff --git a/runtime/neurun/core/include/backend/IConfig.h b/runtime/neurun/core/include/backend/IConfig.h new file mode 100644 index 000000000..855f31ef9 --- /dev/null +++ b/runtime/neurun/core/include/backend/IConfig.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ICONFIG_H__ +#define __NEURUN_BACKEND_ICONFIG_H__ + +#include "util/ITimer.h" +#include <memory> +#include <string> + +namespace neurun +{ +namespace backend +{ + +struct IConfig +{ + virtual ~IConfig() = default; + + virtual std::string id() = 0; + virtual bool initialize() = 0; + // Support permute kernel + virtual bool SupportPermutation() = 0; + // Support subtensor allocation + virtual bool SupportSubTensorAlloc() = 0; + + // Timer is used for backend profiling. In case of default (nullptr) timer profiler won't work. + virtual std::unique_ptr<util::ITimer> timer() { return nullptr; } +}; + +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ICONFIG_H__ diff --git a/runtime/neurun/core/include/backend/IConstantInitializer.h b/runtime/neurun/core/include/backend/IConstantInitializer.h new file mode 100644 index 000000000..3cc770b29 --- /dev/null +++ b/runtime/neurun/core/include/backend/IConstantInitializer.h @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ICONSTANT_INITIALIZER_H__ +#define __NEURUN_BACKEND_ICONSTANT_INITIALIZER_H__ + +#include <unordered_map> +#include <functional> + +#include "ITensorBuilder.h" +#include "ir/Layout.h" +#include "ir/Operand.h" +#include "ir/Operands.h" +#include "ir/OperationVisitor.h" +#include "ir/OpSequence.h" +#include "util/logging.h" +#include "util/Utils.h" + +namespace +{ +template <typename T> +static void Init(const neurun::ir::Operand &model_obj, neurun::backend::operand::ITensor &obj, + const bool copy, + const neurun::ir::Layout frontend_layout = neurun::ir::Layout::UNKNOWN) +{ + const auto shape = model_obj.shape(); + auto base = reinterpret_cast<const T *>(model_obj.data().base()); + + obj.access([&](::neurun::backend::operand::ITensor &tensor) { + switch (shape.rank()) + { + case 0: + { + assert(model_obj.data().size() == sizeof(T)); + const auto value = *reinterpret_cast<const T *>(base); + T *into = reinterpret_cast<T *>(tensor.buffer()); + *into = value; + break; + } + case 1: + { + auto vec_size = shape.dim(0); + for (int32_t n = 0; n < vec_size; ++n) + { + const T *from = reinterpret_cast<const T *>(base) + n; + const auto value = *from; + + T *into = reinterpret_cast<T *>(tensor.buffer()) + n; + + *into = value; + } + break; + } + case 2: + { + const int32_t copy_len = shape.dim(1); + + for (auto i = 0; i < shape.dim(0); ++i) + { + neurun::util::Coordinates coords{i, 0}; + memcpy(tensor.buffer() + tensor.calcOffset(coords), base + i * copy_len, + copy_len * sizeof(T)); + } + break; + } + case 3: + { + const int32_t width = shape.dim(1); + const int32_t copy_len = shape.dim(2); + + for (auto i = 0; i < shape.dim(0); ++i) + { + for (auto j = 0; j < shape.dim(1); ++j) + { + neurun::util::Coordinates coords{i, j, 0}; + memcpy(tensor.buffer() + tensor.calcOffset(coords), + base + i * width * copy_len + j * copy_len, copy_len * sizeof(T)); + } + } + break; + } + case 4: + { + const int32_t height = shape.dim(1); + const int32_t width = shape.dim(2); + const int32_t copy_len = shape.dim(3); + for (auto i = 0; i < shape.dim(0); ++i) + { + for (auto j = 0; j < shape.dim(1); ++j) + { + for (auto k = 0; k < shape.dim(2); ++k) + { + if (copy) + { + neurun::util::Coordinates coords{i, j, k, 0}; + memcpy(tensor.buffer() + tensor.calcOffset(coords), + base + i * height * width * copy_len + j * width * copy_len + k * copy_len, + copy_len * sizeof(T)); + } + else + { + for (auto l = 0; l < shape.dim(3); ++l) + { + const auto coords = neurun::util::convertCoordinates( + {i, j, k, l}, frontend_layout, tensor.layout()); + T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset(coords)); + T value = *(base + i * height * width * copy_len + j * width * copy_len + + k * copy_len + l); + *into = value; + } + } + } + } + } + break; + } + default: + throw std::runtime_error{"Not yet supported"}; + } + }); +} + +template <typename T> +void copyInit(const neurun::ir::Operand &model_obj, neurun::backend::operand::ITensor &obj) +{ + Init<T>(model_obj, obj, true); +} + +template <typename T> +void permuteInit(const neurun::ir::Operand &model_obj, neurun::backend::operand::ITensor &obj, + const neurun::ir::Layout frontend_layout) +{ + const bool copy = frontend_layout == obj.layout(); + Init<T>(model_obj, obj, copy, frontend_layout); +} + +} // namespace + +namespace neurun +{ +namespace backend +{ + +class IConstantInitializer : ir::OperationVisitor +{ +public: + virtual ~IConstantInitializer() = default; + +public: + void run() + { + assert(tensor_builder().get()); + for (const auto &it : _init_map) + { + const auto &ind = it.first; + const auto &fn = it.second; + + const auto &model_obj = operands().at(ind); + auto tensor_obj = tensor_builder()->tensorAt(ind); + fn(model_obj, *tensor_obj); + VERBOSE(FillOperandData) << "Fill data for operand " << ind.value() << std::endl; + } + _init_map.clear(); + } + +public: + using Initializer = std::function<void(const ir::Operand &, backend::operand::ITensor &)>; + + void generate(const ir::OpSequence &subg, const ir::Operands &operands) + { + _current_subg_layout = subg.getLayout(); + subg.accept(*this); + for (const auto &e : subg.operations()) + { + for (const auto &ind : e.node->getInputs()) + { + const auto &obj = operands.at(ind); + if (obj.isConstant() && !exist(ind)) + { + registerPermuteInitializer(ind, obj); + } + } + } + } + +protected: + using OperationVisitor::visit; + +protected: + virtual const ir::Operands &operands() const = 0; + virtual std::shared_ptr<ITensorBuilder> tensor_builder() const = 0; + +protected: + void registerCopyInitializer(const ir::OperandIndex &index, const ir::Operand &obj) + { + // For only CONSTANTS + // TODO Add to check if tensor has been allocated + if (!obj.isConstant()) + return; + + const auto type = obj.typeInfo().type(); + using ir::DataType; + + switch (type) + { + case DataType::FLOAT32: + _init_map[index] = copyInit<float>; + break; + case DataType::INT32: + _init_map[index] = copyInit<int32_t>; + break; + case DataType::UINT32: + _init_map[index] = copyInit<uint32_t>; + break; + case DataType::BOOL8: + case DataType::QUANT8_ASYMM: + _init_map[index] = copyInit<uint8_t>; + break; + case DataType::QUANT8_SYMM: + _init_map[index] = copyInit<int8_t>; + break; + default: + throw std::runtime_error("Not supported, yet"); + break; + } + } + +protected: + void registerPermuteInitializer(const ir::OperandIndex &index, const ir::Operand &obj) + { + // For only CONSTANTS + // TODO Add to check if tensor has been allocated + if (!obj.isConstant()) + return; + + const auto type = obj.typeInfo().type(); + using ir::DataType; + using namespace std::placeholders; + + switch (type) + { + case DataType::FLOAT32: + _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_subg_layout); + break; + case DataType::INT32: + _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_subg_layout); + break; + case DataType::UINT32: + _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_subg_layout); + break; + case DataType::BOOL8: + case DataType::QUANT8_ASYMM: + _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_subg_layout); + break; + case DataType::QUANT8_SYMM: + _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_subg_layout); + break; + default: + throw std::runtime_error("Not supported, yet"); + break; + } + } + +private: + bool exist(const ir::OperandIndex &ind) { return _init_map.find(ind) != _init_map.end(); } + +protected: + std::unordered_map<ir::OperandIndex, Initializer> _init_map; + ir::Layout _current_subg_layout; +}; + +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ICONSTANT_INITIALIZER_H__ diff --git a/runtime/neurun/core/include/backend/IKernelGenerator.h b/runtime/neurun/core/include/backend/IKernelGenerator.h new file mode 100644 index 000000000..4540e81d2 --- /dev/null +++ b/runtime/neurun/core/include/backend/IKernelGenerator.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_IKERNEL_GENERATOR_H__ +#define __NEURUN_BACKEND_IKERNEL_GENERATOR_H__ + +#include <memory> +#include <functional> + +#include "ITensorBuilder.h" +#include "compiler/IExecutionBuilder.h" +#include "ir/OperationVisitor.h" +#include "ir/OpSequence.h" +#include "cpp14/memory.h" + +namespace neurun +{ +namespace backend +{ + +class IKernelGenerator : public ir::OperationVisitor +{ +public: + virtual ~IKernelGenerator() = default; + + void generate(const ir::OpSequence &subg, neurun::compiler::IExecutionBuilder *executionBuilder) + { + _execution_builder = executionBuilder; + subg.accept(*this); + } + +protected: + using OperationVisitor::visit; + +#define OP(InternalName) \ + void visit(const ir::operation::InternalName &) override \ + { \ + throw std::runtime_error("KernelGenerator: NYI for operation '" #InternalName "'"); \ + } +#include "ir/Operations.lst" +#undef OP + +protected: + neurun::compiler::IExecutionBuilder *_execution_builder; +}; + +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_IKERNEL_GENERATOR_H__ diff --git a/runtime/neurun/core/include/backend/IMemoryManager.h b/runtime/neurun/core/include/backend/IMemoryManager.h new file mode 100644 index 000000000..b06bab872 --- /dev/null +++ b/runtime/neurun/core/include/backend/IMemoryManager.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_IMEMORY_MANAGER_H__ +#define __NEURUN_BACKEND_IMEMORY_MANAGER_H__ + +namespace neurun +{ +namespace backend +{ + +struct IMemoryManager +{ + virtual ~IMemoryManager() = default; + + virtual void allocate(void) = 0; + virtual void deallocate(void) = 0; +}; + +} // namespace backend +} // namespace neurun + +#include <unordered_set> +#include <memory> + +namespace neurun +{ +namespace backend +{ + +using MemoryManagerSet = std::unordered_set<std::unique_ptr<backend::IMemoryManager>>; + +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_IMEMORY_MANAGER_H__ diff --git a/runtime/neurun/core/include/backend/IShapeFixer.h b/runtime/neurun/core/include/backend/IShapeFixer.h new file mode 100644 index 000000000..8168e2a11 --- /dev/null +++ b/runtime/neurun/core/include/backend/IShapeFixer.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ISHAPE_FIXER_H__ +#define __NEURUN_BACKEND_ISHAPE_FIXER_H__ + +#include <memory> +#include <functional> + +#include "ir/LowerInfoMap.h" +#include "ITensorBuilder.h" +#include "ir/OperationVisitor.h" +#include "ir/OpSequence.h" +#include "cpp14/memory.h" + +namespace neurun +{ +namespace backend +{ + +class IShapeFixer : public ir::OperationVisitor +{ +public: + virtual ~IShapeFixer() = default; + +protected: +#define OP(InternalName) \ + void visit(const ir::operation::InternalName &) override \ + { \ + throw std::runtime_error("ShapeFixer: NYI for operation '" #InternalName "'"); \ + } +#include "ir/Operations.lst" +#undef OP + +public: + void fix(const ir::OpSequence &op_seq) { op_seq.accept(*this); }; +}; + +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ISHAPE_FIXER_H__ diff --git a/runtime/neurun/core/include/backend/ITensorBuilder.h b/runtime/neurun/core/include/backend/ITensorBuilder.h new file mode 100644 index 000000000..5eb4ab2d8 --- /dev/null +++ b/runtime/neurun/core/include/backend/ITensorBuilder.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ITENSOR_BUILDER_H__ +#define __NEURUN_BACKEND_ITENSOR_BUILDER_H__ + +#include <map> + +#include "ir/Index.h" +#include "ir/OperandInfo.h" +#include "ir/Operation.h" +#include "ir/Layout.h" +#include "operand/ITensor.h" +#include "compiler/SubTensorInfo.h" +#include "ITensorManager.h" + +namespace neurun +{ +namespace backend +{ + +struct ITensorBuilder +{ + using IterateFunction = std::function<void(const ir::OperandIndex &)>; + + virtual ~ITensorBuilder(void) = default; + + // TODO Merge registerTensorInfo and registerSubTensorInfo using abstraction by internal class + /** + * @brief Register tensor information to allocate on backend + */ + virtual void registerTensorInfo(const ir::OperandIndex &, const ir::OperandInfo &, + ir::Layout backend_layout, bool as_const) = 0; + /** + * @brief Register subtensor information to allocate on backend + */ + virtual void registerSubTensorInfo(const ir::OperandIndex &, const compiler::SubTensorInfo &) = 0; + + virtual void notifyFirstUse(const ir::OperandIndex &) = 0; + virtual void notifyLastUse(const ir::OperandIndex &) = 0; + + virtual bool isRegistered(const ir::OperandIndex &) const = 0; + + virtual void prepare(void) = 0; + virtual void allocateConsts() = 0; + virtual void allocateNonconsts() = 0; + virtual void postFunctionPrepare() = 0; + virtual void finalize() = 0; + + virtual std::shared_ptr<::neurun::backend::operand::ITensor> + tensorAt(const ir::OperandIndex &ind) = 0; + virtual void iterate(const IterateFunction &fn) = 0; + + virtual void preVisit(const ir::Operation &) = 0; + virtual void postVisit(const ir::Operation &) = 0; + + virtual std::unique_ptr<ITensorManager> releaseTensorManager(void) = 0; +}; + +} // namespace backend +} // namespace neurun + +#include <unordered_set> +#include <memory> + +namespace neurun +{ +namespace backend +{ + +using TensorBuilderSet = std::unordered_set<std::shared_ptr<backend::ITensorBuilder>>; + +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ITENSOR_BUILDER_H__ diff --git a/runtime/neurun/core/include/backend/ITensorManager.h b/runtime/neurun/core/include/backend/ITensorManager.h new file mode 100644 index 000000000..74506ef59 --- /dev/null +++ b/runtime/neurun/core/include/backend/ITensorManager.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ITENSOR_MANAGER_H__ +#define __NEURUN_BACKEND_ITENSOR_MANAGER_H__ + +namespace neurun +{ +namespace backend +{ + +// NOTE This name ITensorManager has been discussed whether or not the name is proper. +// Anyone can argue with any better name. +/** + * @brief Interface as an abstract tensor manager which has MemoryManager + */ +struct ITensorManager +{ + virtual ~ITensorManager() = default; + + virtual void allocateConsts(void) = 0; + virtual void allocateNonconsts(void) = 0; + virtual void deallocateConsts(void) = 0; + virtual void deallocateNonconsts(void) = 0; +}; + +} // namespace backend +} // namespace neurun + +#include <unordered_set> +#include <memory> + +namespace neurun +{ +namespace backend +{ + +using TensorManagerSet = std::unordered_set<std::unique_ptr<backend::ITensorManager>>; + +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ITENSOR_MANAGER_H__ diff --git a/runtime/neurun/core/include/backend/ITensorRegister.h b/runtime/neurun/core/include/backend/ITensorRegister.h new file mode 100644 index 000000000..9e554ab77 --- /dev/null +++ b/runtime/neurun/core/include/backend/ITensorRegister.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ITENSOR_REGISTER_H__ +#define __NEURUN_BACKEND_ITENSOR_REGISTER_H__ + +#include "compiler/SubTensorInfo.h" +#include "ir/LowerInfoMap.h" +#include "ir/operand/ParentInfo.h" +#include "ITensorBuilder.h" +#include "ir/Layout.h" +#include "ir/OperandIndexSequence.h" +#include "ir/OperandInfo.h" +#include "ir/Operands.h" +#include "ir/OperationVisitor.h" + +namespace +{ + +neurun::ir::Shape permuteTensorShape(const neurun::ir::Shape &shape, + neurun::ir::Layout frontend_layout, + neurun::ir::Layout backend_layout) +{ + assert(shape.rank() <= 4); + neurun::ir::Shape backend_shape{shape}; + if (shape.rank() == 4 && frontend_layout == neurun::ir::Layout::NHWC && + backend_layout == neurun::ir::Layout::NCHW) + { + backend_shape.dim(1) = shape.dim(3); + backend_shape.dim(2) = shape.dim(1); + backend_shape.dim(3) = shape.dim(2); + } + else if (shape.rank() == 4 && frontend_layout == neurun::ir::Layout::NCHW && + backend_layout == neurun::ir::Layout::NHWC) + { + backend_shape.dim(1) = shape.dim(2); + backend_shape.dim(2) = shape.dim(3); + backend_shape.dim(3) = shape.dim(1); + } + return backend_shape; +} +} // namespace + +namespace neurun +{ +namespace backend +{ + +class ITensorRegister : public ir::OperationVisitor +{ +public: + virtual ~ITensorRegister() = default; + +public: + void registerTensors(const ir::OpSequence &subg, const ir::LowerInfoMap *lower_info_map) + { + _current_subg_layout = subg.getLayout(); + _lower_info_map = lower_info_map; + assert(_lower_info_map != nullptr); + assert(tensor_builder().get() != nullptr); + subg.accept(*this); + } + +protected: + virtual const ir::Operands &operands() const = 0; + virtual std::shared_ptr<ITensorBuilder> tensor_builder() const = 0; + virtual bool supportSubTensor() const = 0; + +protected: +#define OP(InternalName) \ + void visit(const ir::operation::InternalName &node) override \ + { \ + ir::OperandIndexSequence indices{node.getInputs()}; \ + indices.append(node.getOutputs()); \ + for (const auto &index : indices) \ + { \ + defaultRegisterTensorInfo(index); \ + } \ + } +#include "ir/Operations.lst" +#undef OP + +protected: + void defaultRegisterTensorInfo(const ir::OperandIndex &index) const + { + if (tensor_builder()->isRegistered(index)) + { + return; + } + + const auto &obj = operands().at(index); + const auto frontend_layout = frontendLayout(); + const auto backend_layout = backendLayout(index); + if (supportSubTensor() && obj.parent_info() != nullptr) + { + tensor_builder()->registerSubTensorInfo( + index, generateSubTensorInfo(obj, frontend_layout, backend_layout)); + } + else + { + ir::OperandInfo backend_info{permuteTensorShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo()}; + tensor_builder()->registerTensorInfo(index, backend_info, backend_layout, obj.isConstant()); + } + } + +protected: + ir::Layout frontendLayout() const { return _current_subg_layout; } + ir::Layout backendLayout(const ir::OperandIndex &index) const + { + assert(_lower_info_map != nullptr); + const auto lower_info = _lower_info_map->operand.at(index).get(); + return lower_info->def_factors().getOnlyElement().layout(); + } + +private: + compiler::SubTensorInfo generateSubTensorInfo(const ir::Operand &obj, ir::Layout frontend_layout, + ir::Layout backend_layout) const + { + assert(obj.shape().rank() <= 4); + const auto parent_index = obj.parent_info()->parent(); + auto shape = obj.shape(); + auto offset = obj.parent_info()->offset(); + if (operands().at(parent_index).shape().rank() == 4 && frontend_layout == ir::Layout::NHWC && + backend_layout == ir::Layout::NCHW) + { + shape.extendRank(4); + offset = {offset[0], offset[3], offset[1], offset[2]}; + } + else if (operands().at(parent_index).shape().rank() == 4 && + frontend_layout == ir::Layout::NHWC && backend_layout == ir::Layout::NCHW) + { + shape.extendRank(4); + offset = {offset[0], offset[2], offset[3], offset[1]}; + } + ir::Operand subtensor_obj{permuteTensorShape(shape, frontend_layout, backend_layout), + obj.typeInfo()}; + subtensor_obj.parent_info( + nnfw::cpp14::make_unique<ir::operand::ParentInfo>(parent_index, offset)); + return compiler::SubTensorInfo{subtensor_obj}; + } + +private: + ir::Layout _current_subg_layout; + const ir::LowerInfoMap *_lower_info_map{nullptr}; +}; + +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ITENSOR_REGISTER_H__ diff --git a/runtime/neurun/core/include/backend/JSONExecTime.h b/runtime/neurun/core/include/backend/JSONExecTime.h new file mode 100644 index 000000000..84505e10f --- /dev/null +++ b/runtime/neurun/core/include/backend/JSONExecTime.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_JSON_EXEC_TIME_H__ +#define __NEURUN_BACKEND_JSON_EXEC_TIME_H__ + +#include <fstream> +#include <unordered_map> +#include <map> +#include <vector> +#include "backend/Backend.h" +#include "backend/IConfig.h" + +namespace neurun +{ +namespace backend +{ + +/** + * @brief table, that contains execution time of an operation on some backend for different input + * sizes and transfer time from one backend to another for various input sizes (permutation time) + * + * backend -> op -> quant-> size --> time + * _measurements[Backend*]["string"][bool][uint32_t] = int64_t + */ +using MeasurementData = std::unordered_map< + const Backend *, + std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>; + +class JSON +{ +public: + explicit JSON(const std::vector<const Backend *> &backends, MeasurementData &measurements) + : _measurement_file("exec_time.json"), _backends(), _measurements(measurements) + { + for (const auto b : backends) + { + _backends.emplace(b->config()->id(), b); + } + loadOperationsExecTime(); + }; + /** + * @brief Update _operations_exec_time_file with new data. + */ + void uploadOperationsExecTime() const; + +private: + ///@brief file containing measurements + std::string _measurement_file; + std::unordered_map<std::string, const Backend *> _backends; + std::unordered_map< + const Backend *, + std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>> + &_measurements; + /** + * @brief Helper function for inserting data to OperationExecTimes + * + * @param backend String name of backend + * @param operation String name of operation + * @param quant if input type quantized + * @param stream File stream + */ + void readOperation(const std::string &backend, const std::string &operation, bool quant, + std::ifstream &stream); + + /** + * @brief Helper function for writing OperationExecTimes to stream + * + * @param operation_info Map of operations execution information + * @param stream File stream + */ + void printOperation(const std::map<uint32_t, int64_t> &operation_info, + std::ofstream &stream) const; + /** + * @brief Parse and load operations_exec_time from _operations_exec_time_file. + */ + void loadOperationsExecTime(); +}; + +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_JSON_EXEC_TIME_H__ diff --git a/runtime/neurun/core/include/backend/operand/ITensor.h b/runtime/neurun/core/include/backend/operand/ITensor.h new file mode 100644 index 000000000..c278b01a0 --- /dev/null +++ b/runtime/neurun/core/include/backend/operand/ITensor.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_OPERAND_I_TENSOR_H__ +#define __NEURUN_BACKEND_OPERAND_I_TENSOR_H__ + +#include <cstring> +#include <cstdint> +#include <functional> + +#include "ir/Layout.h" +#include "util/Coordinates.h" + +namespace neurun +{ +namespace backend +{ +namespace operand +{ + +class ITensor +{ +public: + virtual ~ITensor() = default; + +public: + virtual uint8_t *buffer() const = 0; + virtual size_t total_size() const = 0; + virtual size_t dimension(size_t index) const = 0; + virtual size_t num_dimensions() const = 0; + virtual size_t calcOffset(const neurun::util::Coordinates &coords) const = 0; + virtual ir::Layout layout() const = 0; + virtual bool has_padding() const = 0; + virtual void access(const std::function<void(ITensor &tensor)> &fn) = 0; +}; + +} // namespace operand +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_OPERAND_I_TENSOR_H__ diff --git a/runtime/neurun/core/include/compiler/Compiler.h b/runtime/neurun/core/include/compiler/Compiler.h new file mode 100644 index 000000000..797fc77ad --- /dev/null +++ b/runtime/neurun/core/include/compiler/Compiler.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Compiler.h + * @brief This file contains Compiler class to define and run compilation phase + */ + +#ifndef __NEURUN_COMPILER_COMPILE_H_ +#define __NEURUN_COMPILER_COMPILE_H_ + +#include "ir/Graph.h" +#include "exec/IExecutor.h" + +namespace neurun +{ + +namespace compiler +{ + +enum class State +{ + CREATED, // Before compilation + STARTED, // Compile is started + LOWERED, // Backend is decided + COMPILED // Success compilation +}; + +/** + * @brief Class to compile graph model + */ +class Compiler +{ +public: + /** + * @brief Construct a new Compiler object + * @param[in] model Graph model + */ + Compiler(const std::shared_ptr<ir::Graph> &graph) + : _graph{graph}, _executor{nullptr}, _state{State::CREATED} + { + // DO NOTHING + } + +public: + /** + * @brief Run compilation. Compilation result will be saved in _plan + */ + void compile(void); + /** + * @brief Pass plan reference + * @param[out] plan Plan reference to return\n + * Set nullptr if compile is not run yet + */ + void release(std::shared_ptr<exec::IExecutor> &executor) { executor = _executor; } + + void state(State state) { _state = state; } + State state(void) const { return _state; } + + /** + * @brief Check if model can compile + * @return @c true if model can compile, otherwise @c false + * @note This method don't check model correctness,\n + * so model verification should be done before calling this method + */ + bool checkCompilable(); + +private: + std::shared_ptr<ir::Graph> _graph; + std::shared_ptr<exec::IExecutor> _executor; + State _state; +}; + +} // namespace compiler + +} // namespace neurun + +#endif // __NEURUN_COMPILER_COMPILE_H_ diff --git a/runtime/neurun/core/include/compiler/IExecutionBuilder.h b/runtime/neurun/core/include/compiler/IExecutionBuilder.h new file mode 100644 index 000000000..c5a06fec0 --- /dev/null +++ b/runtime/neurun/core/include/compiler/IExecutionBuilder.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_COMPILER_I_EXECUTION_BUILDER_H__ +#define __NEURUN_COMPILER_I_EXECUTION_BUILDER_H__ + +#include <memory> + +#include "exec/IFunction.h" + +namespace neurun +{ +namespace compiler +{ + +struct IExecutionBuilder +{ + virtual ~IExecutionBuilder() = default; + + virtual void append(std::unique_ptr<::neurun::exec::IFunction> &&f) = 0; +}; + +} // namespace compiler +} // namespace neurun + +#endif // __NEURUN_COMPILER_I_EXECUTION_BUILDER_H__ diff --git a/runtime/neurun/core/include/compiler/SubTensorInfo.h b/runtime/neurun/core/include/compiler/SubTensorInfo.h new file mode 100644 index 000000000..18cab466b --- /dev/null +++ b/runtime/neurun/core/include/compiler/SubTensorInfo.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file SubTensorInfo.h + * @brief This file contains SubTensorInfo to represent subsumption between tensors + * for backend tensor allocation + */ +#ifndef __NEURUN_COMPILER_SUBTENSOR_INFO_H__ +#define __NEURUN_COMPILER_SUBTENSOR_INFO_H__ + +#include "ir/Operand.h" + +namespace neurun +{ +namespace compiler +{ + +/** + * @brief Class to represent information of subtensor + */ +class SubTensorInfo +{ +public: + SubTensorInfo() = delete; + + /** + * @brief Construct a new SubTensorInfo object + * @param[in] obj SubTensor object + */ + SubTensorInfo(const ir::Operand &obj) + : _parent{obj.parent_info()->parent()}, _shape{obj.shape()}, _type{obj.typeInfo()}, + _offset{obj.parent_info()->offset()} + { + // DO NOTHING + } + +public: + /** + * @brief Return parent tensor index + * @return Parent tensor index + */ + const ir::OperandIndex parent(void) const { return _parent; } + /** + * @brief Return tensor shape + * @return Tensor shape + */ + const ir::Shape &shape(void) const { return _shape; } + /** + * @brief Return tensor type + * @return Tensor type + */ + const ir::TypeInfo &type(void) const { return _type; } + /** + * @brief Return tensor's offset in parent tensor + * @return Tensor offset + */ + const neurun::util::Coordinates offset(void) const { return _offset; } + +private: + const ir::OperandIndex _parent; + const ir::Shape _shape; + const ir::TypeInfo _type; + const neurun::util::Coordinates _offset; +}; + +} // compiler +} // neurun + +#endif // __NEURUN_COMPILER_SUBTENSOR_INFO_H__ diff --git a/runtime/neurun/core/include/exec/Execution.h b/runtime/neurun/core/include/exec/Execution.h new file mode 100644 index 000000000..7304f8aab --- /dev/null +++ b/runtime/neurun/core/include/exec/Execution.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Execution.h + * @brief This file defines execution + */ +#ifndef __NEURUN_EXEC_EXECUTION_H__ +#define __NEURUN_EXEC_EXECUTION_H__ + +#include "ir/Layout.h" +#include "exec/IExecutor.h" +#include "IODescription.h" + +#include <thread> + +namespace neurun +{ +namespace exec +{ + +/** + * @brief Class to define execution instance to collect input/output information for inference + * and prepare executor run (TODO) + */ +class Execution +{ + +public: + /** + * @brief Construct a new Execution object + * @param[in] executor Model executor + */ + Execution(const std::shared_ptr<IExecutor> &executor); + +public: + /** + * @brief Returns graph object + * @return Graph object + */ + const ir::Graph &graph() const { return _executor->graph(); } + /** + * @brief Set input data's information + * @param[in] index Input index + * @param[in] buffer Input data's buffer pointer + * @param[in] length Input data's length + * @param[in] layout Input data's data format + */ + void setInput(const ir::IOIndex &index, const void *buffer, size_t length, + ir::Layout layout = ir::Layout::NHWC); + /** + * @brief Set input data's information, especially to specify unknown dimensions on model + * build time. + * @param[in] index Input index + * @param[in] type Input data's type info + * @param[in] shape Input data's shape + * @param[in] buffer Input data's buffer pointer + * @param[in] length Input data's length + * @param[in] layout Input data's data format + */ + void setInput(const ir::IOIndex &index, const ir::TypeInfo &type, const ir::Shape &shape, + const void *buffer, size_t length, ir::Layout layout = ir::Layout::NHWC); + /** + * @brief Set output data's information + * @param[in] index Output index + * @param[in] buffer Output data's buffer pointer + * @param[in] length Output data's length + * @param[in] layout Output data's data format + */ + void setOutput(const ir::IOIndex &index, void *buffer, size_t length, + ir::Layout layout = ir::Layout::NHWC); + /** + * @brief Set output data's information, especially to specify unknown dimensions on model + * build time. + * @param[in] index Output index + * @param[in] type Output data's type info + * @param[in] shape Output data's shape + * @param[in] buffer Output data's buffer pointer + * @param[in] length Output data's length + * @param[in] layout Output data's data format + */ + void setOutput(const ir::IOIndex &index, const ir::TypeInfo &type, const ir::Shape &shape, + void *buffer, size_t length, ir::Layout layout = ir::Layout::NHWC); + /** + * @brief Set input data's data format + * @param[in] index Input index + * @param[in] layout Input data's data format + */ + void setInputLayout(const ir::IOIndex &index, ir::Layout layout); + /** + * @brief Set output data's data format + * @param[in] index Output index + * @param[in] layout Output data's data format + */ + void setOutputLayout(const ir::IOIndex &index, ir::Layout layout); + /** + * @brief Execution + * @note It should be called after setting input and output buffer + */ + void execute(); + + /** + * @brief Start asynchronous execution + * @note It returns after execution thread is started + * It should be called after setting input and output buffer + */ + void startExecute(void); + + /** + * @brief Return when execution is finished + * @note It waits until execution is finished + */ + void waitFinish(void); + + /** + * @brief Check execution is finished + * @return @c true if execution is finished, otherwise @c false + */ + bool isFinished(void) const; + +private: + const std::shared_ptr<IExecutor> _executor; + IODescription _io_desc; + std::unique_ptr<std::thread> _exec_thread; + bool finished{false}; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_EXECUTION_H__ diff --git a/runtime/neurun/core/include/exec/ExecutionObservers.h b/runtime/neurun/core/include/exec/ExecutionObservers.h new file mode 100644 index 000000000..ca658c706 --- /dev/null +++ b/runtime/neurun/core/include/exec/ExecutionObservers.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_OBSREVERS_H__ +#define __NEURUN_EXEC_OBSREVERS_H__ + +#include "exec/IFunction.h" +#include "ir/OpSequence.h" +#include "backend/ExecTime.h" +#include "util/ITimer.h" +#include "IExecutor.h" +#include "misc/EventCollector.h" +#include "misc/EventRecorder.h" + +namespace neurun +{ +namespace exec +{ +class IExecutionObserver +{ +public: + /// @brief Invoked just before model (not individual operation) execution begins + virtual void handleBegin(IExecutor *) { return; } + + virtual void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0; + virtual void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0; + + /// @brief Invoked just after model (not individual operation) execution ends + virtual void handleEnd(IExecutor *) { return; } + + virtual ~IExecutionObserver() = default; +}; + +class ProfileObserver : public IExecutionObserver +{ +public: + explicit ProfileObserver(std::shared_ptr<backend::ExecTime> et) : _et(std::move(et)) {} + void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override; + void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override; + + void handleEnd(IExecutor *) override { _et->uploadOperationsExecTime(); } + +private: + std::unique_ptr<util::ITimer> _timer; + std::shared_ptr<backend::ExecTime> _et; +}; + +class ChromeTracingObserver : public IExecutionObserver +{ +public: + ChromeTracingObserver(const std::string &filepath); + ~ChromeTracingObserver(); + void handleBegin(IExecutor *) override; + void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override; + void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override; + void handleEnd(IExecutor *) override; + +private: + static std::string subgraphTag(const ir::OpSequence *op_seq); + +private: + std::ofstream _ofs; + EventRecorder _recorder; + EventCollector _collector; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_OBSREVERS_H__ diff --git a/runtime/neurun/core/include/exec/IExecutor.h b/runtime/neurun/core/include/exec/IExecutor.h new file mode 100644 index 000000000..de3291388 --- /dev/null +++ b/runtime/neurun/core/include/exec/IExecutor.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file IExecutor.h + * @brief This file defines interface of Executor + */ +#ifndef __NEURUN_EXEC_I_EXECUTOR_H_ +#define __NEURUN_EXEC_I_EXECUTOR_H_ + +#include "ir/Graph.h" +#include "IFunction.h" +#include "IODescription.h" +#include "ir/OperationIndexMap.h" + +namespace neurun +{ +namespace exec +{ +class IExecutionObserver; +/** + * @brief Struct to define interface of Executor + */ +struct IExecutor +{ + /** + * @brief Construct a new IExecutor object + */ + IExecutor() = default; + /** + * @brief Destroy the IExecutor object + */ + virtual ~IExecutor() = default; + + /** + * @brief Returns graph object + * + * @return Graph object + */ + virtual const ir::Graph &graph() = 0; + + /** + * @brief Set an ordering on operations + * @param[in] ranks The table encoding the ordering + */ + virtual void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) = 0; + + /** + * @brief Start execution + * @param[in] desc Input and output description + * @note This method should be thread-safe + */ + virtual void execute(const IODescription &desc) = 0; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_I_EXECUTOR_H_ diff --git a/runtime/neurun/core/include/exec/IFunction.h b/runtime/neurun/core/include/exec/IFunction.h new file mode 100644 index 000000000..5cc29ea75 --- /dev/null +++ b/runtime/neurun/core/include/exec/IFunction.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_I_FUNCTION_H__ +#define __NEURUN_EXEC_I_FUNCTION_H__ + +namespace neurun +{ +namespace exec +{ + +class IFunction +{ +public: + virtual ~IFunction() = default; + virtual void run() = 0; + virtual void runSync() = 0; + virtual void prepare() {} +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_I_FUNCTION_H__ diff --git a/runtime/neurun/core/include/exec/IODescription.h b/runtime/neurun/core/include/exec/IODescription.h new file mode 100644 index 000000000..bdcc78176 --- /dev/null +++ b/runtime/neurun/core/include/exec/IODescription.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_IO_DESCRIPTION_H__ +#define __NEURUN_EXEC_IO_DESCRIPTION_H__ + +#include <vector> + +#include "ir/OperandInfo.h" + +namespace neurun +{ +namespace exec +{ + +struct InputDesc +{ + const ir::OperandInfo info; + const void *buffer; + const size_t size; + const ir::Layout layout; + + InputDesc(void) = delete; + InputDesc(const ir::OperandInfo &info, const void *buffer, const size_t size, ir::Layout layout) + : info(info), buffer(buffer), size(size), layout(layout) + { + } +}; + +struct OutputDesc +{ + const ir::OperandInfo info; + void *buffer; + const size_t size; + const ir::Layout layout; + + OutputDesc(void) = delete; + OutputDesc(const ir::OperandInfo &info, void *buffer, const size_t size, ir::Layout layout) + : info(info), buffer(buffer), size(size), layout(layout) + { + } +}; + +struct IODescription +{ + std::vector<std::unique_ptr<InputDesc>> inputs; + std::vector<std::unique_ptr<OutputDesc>> outputs; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_IO_DESCRIPTION_H__ diff --git a/runtime/neurun/core/include/exec/NopFunction.h b/runtime/neurun/core/include/exec/NopFunction.h new file mode 100644 index 000000000..5cbd7e5ce --- /dev/null +++ b/runtime/neurun/core/include/exec/NopFunction.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file NopFunction.h + * @brief This file defines NopFunction + */ +#ifndef __NEURUN_EXEC_NOP_FUNCTION_H_ +#define __NEURUN_EXEC_NOP_FUNCTION_H_ + +#include "IFunction.h" + +namespace neurun +{ +namespace exec +{ + +/** + * @brief A derivative of IFunction tha does nothing + * + */ +class NopFunction : public IFunction +{ +public: + NopFunction() = default; + void run() override + { + // DO NOTHING + } + void runSync() override + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_NOP_FUNCTION_H_ diff --git a/runtime/neurun/core/include/ir/BackendSet.h b/runtime/neurun/core/include/ir/BackendSet.h new file mode 100644 index 000000000..4979286df --- /dev/null +++ b/runtime/neurun/core/include/ir/BackendSet.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_BACKEND_SET_H__ +#define __NEURUN_IR_BACKEND_SET_H__ + +#include "util/Set.h" + +namespace neurun +{ +namespace backend +{ +class Backend; +} // namespace backend +} // namespace neurun + +namespace neurun +{ +namespace ir +{ + +using BackendSet = util::Set<const backend::Backend *>; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_BACKEND_SET_H__ diff --git a/runtime/neurun/core/include/ir/Data.h b/runtime/neurun/core/include/ir/Data.h new file mode 100644 index 000000000..a0a489553 --- /dev/null +++ b/runtime/neurun/core/include/ir/Data.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_DATA_H__ +#define __NEURUN_IR_DATA_H__ + +#include <algorithm> + +namespace neurun +{ +namespace ir +{ + +struct Data +{ + virtual ~Data() = default; + + virtual size_t size(void) const = 0; + virtual const uint8_t *base(void) const = 0; +}; + +class CachedData final : public Data +{ +public: + CachedData(const uint8_t *base, size_t size) : _base{new uint8_t[size]}, _size{size} + { + std::copy(base, base + size, _base); + } + +public: + ~CachedData() { delete[] _base; } + +public: + size_t size(void) const override { return _size; } + const uint8_t *base(void) const override { return _base; } + +private: + uint8_t *_base; + size_t _size; +}; + +class ExternalData final : public Data +{ +public: + ExternalData(const uint8_t *base, size_t size) : _base{base}, _size{size} + { + // DO NOTHING + } + +public: + size_t size(void) const override { return _size; } + const uint8_t *base(void) const override { return _base; } + +private: + const uint8_t *_base; + const size_t _size; +}; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_DATA_H__ diff --git a/runtime/neurun/core/include/ir/DataType.h b/runtime/neurun/core/include/ir/DataType.h new file mode 100644 index 000000000..6eca6eb27 --- /dev/null +++ b/runtime/neurun/core/include/ir/DataType.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_DATATYPE_H__ +#define __NEURUN_IR_DATATYPE_H__ + +#include <stdexcept> + +namespace neurun +{ +namespace ir +{ + +enum class DataType +{ + FLOAT32 = 0, + INT32 = 1, + UINT32 = 2, + QUANT8_ASYMM = 3, + BOOL8 = 4, + UINT8 = 5, + QUANT8_SYMM = 6, +}; + +inline size_t sizeOfDataType(DataType data_type) +{ + switch (data_type) + { + case DataType::FLOAT32: + return sizeof(float); + case DataType::INT32: + return sizeof(int32_t); + case DataType::UINT32: + return sizeof(uint32_t); + case DataType::BOOL8: + case DataType::QUANT8_ASYMM: + case DataType::UINT8: + return sizeof(uint8_t); + case DataType::QUANT8_SYMM: + return sizeof(int8_t); + default: + throw std::runtime_error{"Unsupported type size"}; + } +} + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_DATATYPE_H__ diff --git a/runtime/neurun/core/include/ir/Graph.h b/runtime/neurun/core/include/ir/Graph.h new file mode 100644 index 000000000..5105c3a42 --- /dev/null +++ b/runtime/neurun/core/include/ir/Graph.h @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_GRAPH_H__ +#define __NEURUN_IR_GRAPH_H__ + +#include <functional> + +#include "ir/Operands.h" +#include "ir/Operations.h" +#include "ir/LowerInfoMap.h" +#include "ir/OpSequence.h" +#include "ir/Subgraphs.h" + +namespace neurun +{ +namespace compiler +{ +class BackendResolver; +} // namespace compiler +} // namespace neurun + +namespace neurun +{ +namespace backend +{ +namespace custom +{ +class IKernelBuilder; +} // namespace custom +} // namespace backend +} // namespace neurun + +namespace neurun +{ +namespace ir +{ + +class Graph +{ +private: + enum class Phase + { + BUILDING, + MODEL + }; + +public: + Graph(void); + ~Graph(void); + + // Graph Building +public: + OperandIndex addOperand(const Shape &shape, const TypeInfo &type); + OperationIndex addOperation(std::unique_ptr<Operation> &&node); + void setOperandValue(const OperandIndex &ind, std::unique_ptr<Data> &&data); + void addInput(const OperandIndex &ind); + void addOutput(const OperandIndex &ind); + void finishBuilding(void); + void lower(void); + void removeOperand(const OperandIndex &ind) { _operands.remove(ind); } + bool isBuildingPhase(void) const { return _phase == Phase::BUILDING; } + +private: + void initializeUseDef(); + + // Custom operations support +public: + void + bindKernelBuilder(const std::shared_ptr<neurun::backend::custom::IKernelBuilder> &kernel_builder) + { + _kernel_builder = kernel_builder; + } + + const std::shared_ptr<backend::custom::IKernelBuilder> &getKernelBuilder() const + { + return _kernel_builder; + } + +private: + std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder; + + // Accessors +public: + const OperandIndexSequence &getInputs() const { return _inputs; } + OperandIndexSequence &getInputs() { return _inputs; } + const OperandIndexSequence &getOutputs() const { return _outputs; } + OperandIndexSequence &getOutputs() { return _outputs; } + const Operands &operands() const { return _operands; } + Operands &operands() { return _operands; } // TODO Remove this non-const accessor + const Operations &operations() const { return _operations; } + Operations &operations() { return _operations; } + const compiler::BackendResolver *backend_resolver() const { return _backend_resolver.get(); } + +private: + Phase _phase{Phase::BUILDING}; + Operations _operations; + Operands _operands; + OperandIndexSequence _inputs; + OperandIndexSequence _outputs; + + // For LOWERED phase +public: + const LowerInfoMap *getLowerInfo() const { return _lower_info_map.get(); } + const operation::LowerInfo *getLowerInfo(const SubgraphIndex &subg_index) const; + void setLowerInfo(const SubgraphIndex &subg_index, + std::unique_ptr<operation::LowerInfo> &&lower_info); + void removeLowerInfo(const SubgraphIndex &subg_index); + const operand::LowerInfo *getLowerInfo(const OperandIndex &index) const; + operand::LowerInfo *getLowerInfo(const OperandIndex &index); + void setLowerInfo(const OperandIndex &index, std::unique_ptr<operand::LowerInfo> &&lower_info); + void removeLowerInfo(const OperandIndex &index); + Subgraphs &subgraphs() + { + assert(_op_seqs); + return *_op_seqs; + } + const Subgraphs *subgraphs() const { return _op_seqs.get(); } + void setBackendResolver(std::unique_ptr<compiler::BackendResolver> &&br); + +private: + void makeSubgraphs(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info); + void + manipulateLowerInfo(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info); + void dumpLowerInfo(); + bool mergeable(const SubgraphIndex &subg_index, const OperationIndex &node_index, Layout layout); + SubgraphIndex appendFreshSingleOpSubgraph(const OperationIndex &node_index, const Operation &node, + Layout layout); + +private: + std::unique_ptr<compiler::BackendResolver> _backend_resolver; + std::unique_ptr<LowerInfoMap> _lower_info_map; + // Pass(for Perm) can accept only graph so that Graph has Subgraphs as a member + std::unique_ptr<Subgraphs> _op_seqs; +}; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_GRAPH_H__ diff --git a/runtime/neurun/core/include/ir/Index.h b/runtime/neurun/core/include/ir/Index.h new file mode 100644 index 000000000..aebc64dcd --- /dev/null +++ b/runtime/neurun/core/include/ir/Index.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERAND_INDEX_H__ +#define __NEURUN_IR_OPERAND_INDEX_H__ + +#include "util/Index.h" + +namespace neurun +{ +namespace ir +{ + +struct OperationIndexTag; +using OperationIndex = ::neurun::util::Index<uint32_t, OperationIndexTag>; + +struct OperandIndexTag; +using OperandIndex = ::neurun::util::Index<uint32_t, OperandIndexTag>; + +struct IOIndexTag; +using IOIndex = ::neurun::util::Index<uint32_t, IOIndexTag>; + +struct SubgraphIndexTag; +using SubgraphIndex = ::neurun::util::Index<uint32_t, SubgraphIndexTag>; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERAND_INDEX_H__ diff --git a/runtime/neurun/core/include/ir/InternalType.h b/runtime/neurun/core/include/ir/InternalType.h new file mode 100644 index 000000000..91085f2f3 --- /dev/null +++ b/runtime/neurun/core/include/ir/InternalType.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_INTERNAL_TYPE_H__ +#define __NEURUN_IR_INTERNAL_TYPE_H__ + +#include <cstdint> + +namespace neurun +{ +namespace ir +{ + +enum class Activation +{ + NONE = 0, + RELU = 1, + RELU1 = 2, + RELU6 = 3, + TANH = 4, + SIGMOID = 5 +}; + +enum class PaddingType +{ + EXPLICIT = 0, + SAME = 1, + VALID = 2 +}; + +struct ExplicitPadding +{ + uint32_t left; + uint32_t right; + uint32_t top; + uint32_t bottom; +}; + +// TODO Resolve explicit padding param at frontend and save in value field +struct Padding +{ + PaddingType type; + ExplicitPadding param; +}; + +struct Stride +{ + uint32_t vertical; + uint32_t horizontal; +}; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_INTERNAL_TYPE_H__ diff --git a/runtime/neurun/core/include/ir/Layout.h b/runtime/neurun/core/include/ir/Layout.h new file mode 100644 index 000000000..c4edb70db --- /dev/null +++ b/runtime/neurun/core/include/ir/Layout.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_LAYOUT_H__ +#define __NEURUN_IR_LAYOUT_H__ + +#include <functional> +#include <string> + +namespace neurun +{ +namespace ir +{ + +enum class Layout +{ + UNKNOWN = 0, + NHWC, + NCHW +}; + +inline std::string to_string(Layout layout) +{ + switch (layout) + { + case Layout::NHWC: + return std::string{"NHWC"}; + case Layout::NCHW: + return std::string{"NCHW"}; + case Layout::UNKNOWN: + return std::string{"UNKNOWN"}; + default: + throw std::runtime_error("WRONG LAYOUT"); + } +} + +} // namespace ir +} // namespace neurun + +namespace std +{ + +template <> struct hash<neurun::ir::Layout> +{ + size_t operator()(neurun::ir::Layout value) const noexcept + { + using type = typename std::underlying_type<neurun::ir::Layout>::type; + return hash<type>()(static_cast<type>(value)); + } +}; + +} // namespace std + +#endif // __NEURUN_IR_LAYOUT_H__ diff --git a/runtime/neurun/core/include/ir/LowerInfoMap.h b/runtime/neurun/core/include/ir/LowerInfoMap.h new file mode 100644 index 000000000..a8fd818b8 --- /dev/null +++ b/runtime/neurun/core/include/ir/LowerInfoMap.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_LOWER_INFO_MAP_H__ +#define __NEURUN_IR_LOWER_INFO_MAP_H__ + +#include <memory> +#include <unordered_map> + +#include "ir/operand/LowerInfo.h" +#include "ir/operation/LowerInfo.h" +#include "ir/OperandIndexMap.h" +#include "ir/Index.h" + +namespace neurun +{ +namespace ir +{ + +struct LowerInfoMap +{ + std::unordered_map<SubgraphIndex, std::unique_ptr<operation::LowerInfo>> operation; + OperandIndexMap<std::unique_ptr<operand::LowerInfo>> operand; +}; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_LOWER_INFO_MAP_H__ diff --git a/runtime/neurun/core/include/ir/OpCode.h b/runtime/neurun/core/include/ir/OpCode.h new file mode 100644 index 000000000..2b466a212 --- /dev/null +++ b/runtime/neurun/core/include/ir/OpCode.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OP_CODE_H__ +#define __NEURUN_IR_OP_CODE_H__ + +#include <functional> +#include <stdint.h> + +namespace neurun +{ +namespace ir +{ + +enum class OpCode +{ + Invalid, //< Unused +#define OP(Name) Name, //< All operations +#include "ir/Operations.lst" +#undef OP + COUNT +}; + +const char *toString(OpCode opcode); + +} // namespace ir +} // namespace neurun + +namespace std +{ + +template <> struct hash<neurun::ir::OpCode> +{ + size_t operator()(neurun::ir::OpCode value) const noexcept + { + using type = typename std::underlying_type<neurun::ir::OpCode>::type; + return hash<type>()(static_cast<type>(value)); + } +}; + +} // namespace std + +#endif // __NEURUN_IR_OP_CODE_H__ diff --git a/runtime/neurun/core/include/ir/OpSequence.h b/runtime/neurun/core/include/ir/OpSequence.h new file mode 100644 index 000000000..68632e589 --- /dev/null +++ b/runtime/neurun/core/include/ir/OpSequence.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OP_SEQUENCE_H__ +#define __NEURUN_IR_OP_SEQUENCE_H__ + +#include <vector> +#include <string> +#include <memory> + +#include "ir/Layout.h" +#include "ir/Index.h" +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ + +// To support ValueSwappable, Element doesn't have members which are classes +// as value(or can have members which are classes as value and the classes +// support Swappable) +struct Element +{ + OperationIndex index; + const Operation *node; + + Element(const OperationIndex *i, const Operation *n) : index{*i}, node{n} + { + // DO NOTHING + } +}; + +class OpSequence +{ +public: + explicit OpSequence(Layout layout); + OpSequence(const OpSequence &) = delete; + +public: + void accept(OperationVisitor &v) const; + +public: + const OperandIndexSequence &getInputs() const { return _inputs; } + const OperandIndexSequence &getOutputs() const { return _outputs; } + void setInputs(const OperandIndexSequence &indexes) { _inputs = indexes; } + void setOutputs(const OperandIndexSequence &indexes) { _outputs = indexes; } + void replaceInput(const OperandIndex &from, const OperandIndex &to) { _inputs.replace(from, to); } + void replaceOutput(const OperandIndex &from, const OperandIndex &to) + { + _outputs.replace(from, to); + } + + void appendOperation(const OperationIndex &index, const Operation &node) + { + _operations.emplace_back(&index, &node); + } + + std::vector<Element> &operations(void) { return _operations; } + + const std::vector<Element> &operations(void) const { return _operations; } + + uint32_t size(void) const { return _operations.size(); } + + // TODO: Impl Dumper instead of this method + std::string getStr(void) const; + +public: + void remove(const OperationIndex &index); + +public: + Layout getLayout() const { return _layout; } + +public: + std::vector<Element>::const_iterator begin() const { return _operations.begin(); } + std::vector<Element>::const_iterator end() const { return _operations.end(); } + +private: + bool exist(const OperationIndex &index) const; + +private: + OperandIndexSequence _inputs; + OperandIndexSequence _outputs; + std::vector<Element> _operations; + +private: + Layout _layout; +}; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OP_SEQUENCE_H__ diff --git a/runtime/neurun/core/include/ir/Operand.h b/runtime/neurun/core/include/ir/Operand.h new file mode 100644 index 000000000..b1f28de48 --- /dev/null +++ b/runtime/neurun/core/include/ir/Operand.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERAND_H__ +#define __NEURUN_IR_OPERAND_H__ + +#include <cassert> +#include <cstdint> +#include <cpp14/memory.h> +#include <algorithm> + +#include "ir/Data.h" +#include "ir/DataType.h" +#include "ir/OperandInfo.h" +#include "ir/operand/ParentInfo.h" // TODO Remove this dependency +#include "ir/OperationIndexList.h" + +namespace neurun +{ +namespace ir +{ + +class Operand +{ +public: + explicit Operand(const Shape &shape, const TypeInfo &type) : _info{shape, type} + { + // DO NOTHING + } + +public: + const Shape &shape(void) const { return _info.shape(); } + const TypeInfo &typeInfo(void) const { return _info.typeInfo(); } + const OperandInfo &info(void) const { return _info; } + size_t operandSize(void) const; + + const OperationIndexList &getUses() const { return _uses; } + const OperationIndexList &getDef() const { return _def; } + void appendUse(const OperationIndex &idx); + void removeUse(const OperationIndex &idx); + void appendDef(const OperationIndex &idx); + void removeDef(const OperationIndex &idx); + +public: + void type(const DataType type) { _info.type(type); }; + +public: + void data(std::unique_ptr<Data> &&data) { _data = std::move(data); } + const Data &data(void) const + { + assert(_data); + return *_data; + } + + /** + * @brief Get true if Operand has data, otherwise @c false + a @return @c true if Operand has data, otherwise @c false + */ + bool isConstant(void) const { return _data != nullptr; } + +public: + template <typename T, typename... Args> void data(Args &&... args) + { + data(nnfw::cpp14::make_unique<T>(std::forward<Args>(args)...)); + } + +public: + template <typename T> T asScalar(void) const + { + assert((shape().rank() == 0) || ((shape().rank() == 1) && (shape().dim(0) == 1))); + assert(_data != nullptr); + assert((_data->base() != nullptr) && (_data->size() == sizeof(T))); + + return *(reinterpret_cast<const T *>(_data->base())); + } + + template <typename T> std::vector<T> asVector() const + { + assert(isConstant()); + assert(_data->size() % sizeof(T) == 0); + + const auto *base = reinterpret_cast<const T *>(_data->base()); + const std::size_t size = _data->size() / sizeof(T); + return std::vector<T>(base, base + size); + } + +public: + /** + * @brief Set parent information + * @param[in] parent_info Parent information + */ + void parent_info(std::unique_ptr<operand::ParentInfo> &&parent_info); + /** + * @brief Return parent information pointer as constant + * @return Parent information pointer + */ + const operand::ParentInfo *parent_info() const; + /** + * @brief Return parent information pointer + * @return Perent information pointer + */ + operand::ParentInfo *parent_info(); + +private: + OperandInfo _info; + std::shared_ptr<Data> _data; + + OperationIndexList _uses; + OperationIndexList _def; // size is 0 (constant) or 1 (from def operation) + + std::shared_ptr<operand::ParentInfo> _parent_info; +}; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERAND_H__ diff --git a/runtime/neurun/core/include/ir/OperandConstraint.h b/runtime/neurun/core/include/ir/OperandConstraint.h new file mode 100644 index 000000000..de6f21634 --- /dev/null +++ b/runtime/neurun/core/include/ir/OperandConstraint.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_MODEL_OPERAND_CONSTRAINT_H__ +#define __NEURUN_MODEL_OPERAND_CONSTRAINT_H__ + +#include <stdint.h> +#include <limits> +#include <set> + +namespace neurun +{ +namespace ir +{ + +class OperandConstraint +{ +private: + static const uint32_t INF = std::numeric_limits<uint32_t>::max(); + +public: + static OperandConstraint createAny() { return OperandConstraint{0u, INF}; } + static OperandConstraint createExact(uint32_t exact) { return OperandConstraint{exact, exact}; } + static OperandConstraint createAtMost(uint32_t end) { return OperandConstraint{0u, end}; } + static OperandConstraint createAtLeast(uint32_t begin) { return OperandConstraint{begin, INF}; } + static OperandConstraint createInRange(uint32_t begin, uint32_t end) + { + return OperandConstraint{begin, end}; + } + +private: + OperandConstraint(uint32_t begin, uint32_t end) : _begin{begin}, _end{end} {} + +public: + bool check(uint32_t ind) const { return _begin <= ind && ind <= _end; } + +private: + uint32_t _begin; + uint32_t _end; +}; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_MODEL_OPERAND_CONSTRAINT_H__ diff --git a/runtime/neurun/core/include/ir/OperandIndexMap.h b/runtime/neurun/core/include/ir/OperandIndexMap.h new file mode 100644 index 000000000..c9234128e --- /dev/null +++ b/runtime/neurun/core/include/ir/OperandIndexMap.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERAND_INDEX_MAP_H__ +#define __NEURUN_IR_OPERAND_INDEX_MAP_H__ + +#include <unordered_map> + +#include "ir/Index.h" + +namespace neurun +{ +namespace ir +{ + +template <typename T> using OperandIndexMap = std::unordered_map<OperandIndex, T>; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERAND_INDEX_MAP_H__ diff --git a/runtime/neurun/core/include/ir/OperandIndexSequence.h b/runtime/neurun/core/include/ir/OperandIndexSequence.h new file mode 100644 index 000000000..7f8cec844 --- /dev/null +++ b/runtime/neurun/core/include/ir/OperandIndexSequence.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_MODEL_OPERAND_INDEX_SEQUENCE_H__ +#define __NEURUN_MODEL_OPERAND_INDEX_SEQUENCE_H__ + +#include <initializer_list> +#include <vector> + +#include "ir/Index.h" + +namespace neurun +{ +namespace ir +{ + +class OperandIndexSequence +{ +public: + OperandIndexSequence(void) = default; + OperandIndexSequence(std::initializer_list<OperandIndex> list); + OperandIndexSequence(std::initializer_list<int32_t> list); + OperandIndexSequence(std::initializer_list<uint32_t> list); + +public: + void append(const OperandIndex &index) { _set.emplace_back(index); } + void append(const OperandIndexSequence &l) { _set.insert(_set.end(), l.begin(), l.end()); } + +public: + uint32_t size() const { return static_cast<uint32_t>(_set.size()); } + const OperandIndex &at(IOIndex set_index) const { return _set.at(set_index.value()); } + const OperandIndex &at(uint32_t index) const { return _set.at(index); } + bool contains(const OperandIndex &index) const; + void replace(const OperandIndex &from, const OperandIndex &to); + +public: + std::vector<OperandIndex>::const_iterator begin(void) const { return _set.begin(); } + std::vector<OperandIndex>::const_iterator end(void) const { return _set.end(); } + +private: + std::vector<OperandIndex> _set; +}; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_MODEL_OPERAND_INDEX_SET_H__ diff --git a/runtime/neurun/core/include/ir/OperandInfo.h b/runtime/neurun/core/include/ir/OperandInfo.h new file mode 100644 index 000000000..82ad7ef0f --- /dev/null +++ b/runtime/neurun/core/include/ir/OperandInfo.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file OperandInfo.h + * @brief This file contains OperandInfo class + */ +#ifndef __NEURUN_IR_OPERAND_INFO_H__ +#define __NEURUN_IR_OPERAND_INFO_H__ + +#include "ir/Shape.h" +#include "ir/TypeInfo.h" +#include "ir/Layout.h" + +namespace neurun +{ +namespace ir +{ + +/** + * @brief Class to save tensor's shape and type + */ +class OperandInfo +{ +public: + /** + * @brief Construct a new OperandInfo object (deleted) + */ + OperandInfo() = delete; + /** + * @brief Construct a new OperandInfo object + * @param[in] shape Tensor shape + * @param[in] typeInfo Tensor data type + */ + OperandInfo(const Shape &shape, const TypeInfo &typeInfo) : _shape(shape), _typeInfo(typeInfo) + { + // DO NOTHING + } + /** + * @brief Construct a new OperandInfo object + * @param[in] origin info for copy + */ + OperandInfo(const OperandInfo &origin) : _shape(origin.shape()), _typeInfo(origin.typeInfo()) + { + // DO NOTHING + } + +public: + /** + * @brief Return tensor shape + * @return Tensor shape + */ + const Shape &shape() const { return _shape; } + /** + * @brief Return tensor data type info + * @return Tensor data type + */ + const TypeInfo &typeInfo() const { return _typeInfo; } + /** + * @brief Set tensor data type + */ + void type(const DataType type) { _typeInfo.type(type); } + /** + * @brief Return size of tensor (bytes) + * @return Tensor size + */ + size_t total_size() const { return _shape.num_elements() * sizeOfDataType(_typeInfo.type()); } + +private: + Shape _shape; + TypeInfo _typeInfo; +}; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERAND_INFO_H__ diff --git a/runtime/neurun/core/include/ir/Operands.h b/runtime/neurun/core/include/ir/Operands.h new file mode 100644 index 000000000..c8d68c088 --- /dev/null +++ b/runtime/neurun/core/include/ir/Operands.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERANDS_H__ +#define __NEURUN_IR_OPERANDS_H__ + +#include <memory> +#include <unordered_map> + +#include "ir/Operand.h" +#include "ir/Index.h" +#include "util/ObjectManager.h" + +namespace neurun +{ +namespace ir +{ + +class Operands : public util::ObjectManager<OperandIndex, Operand> +{ +}; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_MODEL_OPERAND_SET_H__ diff --git a/runtime/neurun/core/include/ir/Operation.h b/runtime/neurun/core/include/ir/Operation.h new file mode 100644 index 000000000..a02f980a5 --- /dev/null +++ b/runtime/neurun/core/include/ir/Operation.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_H__ +#define __NEURUN_IR_OPERATION_H__ + +#include <memory> + +#include "ir/OpCode.h" +#include "ir/Operand.h" +#include "ir/OperandIndexSequence.h" +#include "ir/OperandConstraint.h" + +namespace neurun +{ +namespace ir +{ + +struct OperationVisitor; + +class Operation +{ +public: + Operation(OperandConstraint input_constr, const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs); + explicit Operation(OperandConstraint input_constr); + + Operation(const Operation &) = delete; + Operation(Operation &&) = default; + Operation &operator=(const Operation &) = delete; + Operation &operator=(Operation &&) = default; + + virtual ~Operation(); + +public: + virtual void accept(OperationVisitor &v) const = 0; + virtual std::string name() const { return std::string{toString(opcode())}; } + virtual OpCode opcode() const = 0; + +public: + void replaceInput(const OperandIndex &from, const OperandIndex &to); + void replaceOutput(const OperandIndex &from, const OperandIndex &to); + const OperandIndexSequence &getInputs() const { return _inputs; } + const OperandIndexSequence &getOutputs() const { return _outputs; } + // It's for only input/output tensors but const data. + void setInputs(const OperandIndexSequence &indexes); + void setOutputs(const OperandIndexSequence &indexes); + +private: + OperandConstraint _input_constr; + OperandIndexSequence _inputs; + OperandIndexSequence _outputs; +}; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_H__ diff --git a/runtime/neurun/core/include/ir/OperationIndexList.h b/runtime/neurun/core/include/ir/OperationIndexList.h new file mode 100644 index 000000000..bf3ea3d6b --- /dev/null +++ b/runtime/neurun/core/include/ir/OperationIndexList.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_MODEL_OPERATION_INDEX_LIST_H__ +#define __NEURUN_MODEL_OPERATION_INDEX_LIST_H__ + +#include <algorithm> +#include <cassert> +#include <initializer_list> +#include <list> + +#include "ir/Index.h" + +namespace neurun +{ +namespace ir +{ + +class OperationIndexList +{ +public: + OperationIndexList(void) = default; + OperationIndexList(std::initializer_list<OperationIndex> list); + +public: + void append(const OperationIndex &index) { _list.push_back(index); } + void remove(const OperationIndex &index) + { + auto itr = std::find(_list.begin(), _list.end(), index); + assert(itr != _list.end()); + _list.erase(itr); + } + +public: + uint32_t size() const { return static_cast<uint32_t>(_list.size()); } + const std::list<OperationIndex> &list() const { return _list; } + bool contains(const OperationIndex &index) const; + +private: + std::list<OperationIndex> _list; +}; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_MODEL_OPERATION_INDEX_LIST_H__ diff --git a/runtime/neurun/core/include/ir/OperationIndexMap.h b/runtime/neurun/core/include/ir/OperationIndexMap.h new file mode 100644 index 000000000..50b1db527 --- /dev/null +++ b/runtime/neurun/core/include/ir/OperationIndexMap.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_INDEX_MAP_H__ +#define __NEURUN_IR_OPERATION_INDEX_MAP_H__ + +#include <unordered_map> + +#include "ir/Index.h" + +namespace neurun +{ +namespace ir +{ + +template <typename T> using OperationIndexMap = std::unordered_map<OperationIndex, T>; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_INDEX_MAP_H__ diff --git a/runtime/neurun/core/include/ir/OperationVisitor.h b/runtime/neurun/core/include/ir/OperationVisitor.h new file mode 100644 index 000000000..0eb6de2d3 --- /dev/null +++ b/runtime/neurun/core/include/ir/OperationVisitor.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_VISITOR_H__ +#define __NEURUN_IR_OPERATION_VISITOR_H__ + +#include "ir/Operations.Include.h" +#include "ir/OpSequence.h" + +namespace neurun +{ +namespace ir +{ + +struct OperationVisitor +{ + virtual ~OperationVisitor() = default; + +#define OP(InternalName) \ + virtual void visit(const operation::InternalName &) {} +#include "ir/Operations.lst" +#undef OP + + // This OpSequence node should be handled specially so that + // Op.lst doesn't have OpSequence + // TODO Remove by pushing it down to derived classes. + virtual void visit(const OpSequence &op_seq) + { + for (const auto &e : op_seq.operations()) + { + e.node->accept(*this); + } + } +}; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_VISITOR_H__ diff --git a/runtime/neurun/core/include/ir/Operations.Include.h b/runtime/neurun/core/include/ir/Operations.Include.h new file mode 100644 index 000000000..e14e18cc1 --- /dev/null +++ b/runtime/neurun/core/include/ir/Operations.Include.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This file has no ifdef guard intentionally + +#include "ir/operation/BatchToSpaceND.h" +#include "ir/operation/Conv2D.h" +#include "ir/operation/MaxPool2D.h" +#include "ir/operation/AvgPool2D.h" +#include "ir/operation/Concat.h" +#include "ir/operation/Reshape.h" +#include "ir/operation/FullyConnected.h" +#include "ir/operation/Softmax.h" +#include "ir/operation/Transpose.h" +#include "ir/operation/Permute.h" +#include "ir/operation/ReduceSum.h" +#include "ir/operation/Add.h" +#include "ir/operation/Sub.h" +#include "ir/operation/DepthwiseConv2D.h" +#include "ir/operation/Slice.h" +#include "ir/operation/StridedSlice.h" +#include "ir/operation/Mul.h" +#include "ir/operation/Squeeze.h" +#include "ir/operation/Tanh.h" +#include "ir/operation/Logistic.h" +#include "ir/operation/Cast.h" +#include "ir/operation/Div.h" +#include "ir/operation/Exp.h" +#include "ir/operation/ReduceMax.h" +#include "ir/operation/Comparison.h" +#include "ir/operation/LogicalAnd.h" +#include "ir/operation/LogicalOr.h" +#include "ir/operation/LogicalNot.h" +#include "ir/operation/LSTM.h" +#include "ir/operation/RSQRT.h" +#include "ir/operation/ReLU.h" +#include "ir/operation/ResizeBilinear.h" +#include "ir/operation/ReLU1.h" +#include "ir/operation/ReLU6.h" +#include "ir/operation/RNN.h" +#include "ir/operation/Floor.h" +#include "ir/operation/SpaceToBatchND.h" +#include "ir/operation/SpaceToDepth.h" +#include "ir/operation/L2Pool2D.h" +#include "ir/operation/EmbeddingLookup.h" +#include "ir/operation/L2Normalization.h" +#include "ir/operation/HashtableLookup.h" +#include "ir/operation/InstanceNorm.h" +#include "ir/operation/PReLU.h" +#include "ir/operation/TransposeConv.h" +#include "ir/operation/SQRT.h" +#include "ir/operation/SquaredDifference.h" +#include "ir/operation/TopKV2.h" +#include "ir/operation/Gather.h" +#include "ir/operation/Neg.h" +#include "ir/operation/Abs.h" +#include "ir/operation/ArgMax.h" +#include "ir/operation/Dequantize.h" +#include "ir/operation/Mean.h" +#include "ir/operation/LocalResponseNormalization.h" +#include "ir/operation/DepthToSpace.h" +#include "ir/operation/Pack.h" +#include "ir/operation/ReduceMin.h" +#include "ir/operation/Split.h" +#include "ir/operation/Unpack.h" +#include "ir/operation/Pad.h" +#include "ir/operation/Min.h" +#include "ir/operation/Max.h" +#include "ir/operation/Custom.h" +#include "ir/operation/OneHot.h" diff --git a/runtime/neurun/core/include/ir/Operations.h b/runtime/neurun/core/include/ir/Operations.h new file mode 100644 index 000000000..9e2aecb43 --- /dev/null +++ b/runtime/neurun/core/include/ir/Operations.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATIONS_H__ +#define __NEURUN_IR_OPERATIONS_H__ + +#include "ir/Index.h" +#include "ir/Operation.h" +#include "util/ObjectManager.h" + +namespace neurun +{ +namespace ir +{ + +class Operations : public util::ObjectManager<OperationIndex, Operation> +{ +}; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_MODEL_OPERATION_MANAGER_H__ diff --git a/runtime/neurun/core/include/ir/Operations.lst b/runtime/neurun/core/include/ir/Operations.lst new file mode 100644 index 000000000..8c02857d9 --- /dev/null +++ b/runtime/neurun/core/include/ir/Operations.lst @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef OP +#error Define OP before including this file +#endif + +// Internal Name +OP(Add) +OP(Sub) +OP(BatchToSpaceND) +OP(Cast) +OP(Conv2D) +OP(DepthwiseConv2D) +OP(AvgPool2D) +OP(MaxPool2D) +OP(Concat) +OP(FullyConnected) +OP(ReduceSum) +OP(Reshape) +OP(Mul) +OP(Softmax) +OP(Squeeze) +OP(Slice) +OP(StridedSlice) +OP(Tanh) +OP(Logistic) +OP(Div) +OP(Transpose) +OP(Exp) +OP(ReduceMax) +OP(Comparison) +OP(LogicalAnd) +OP(LogicalOr) +OP(LogicalNot) +OP(LSTM) +OP(RSQRT) +OP(ReLU) +OP(ResizeBilinear) +OP(ReLU1) +OP(ReLU6) +OP(RNN) +OP(Floor) +OP(SpaceToBatchND) +OP(SpaceToDepth) +OP(L2Pool2D) +OP(EmbeddingLookup) +OP(L2Normalization) +OP(HashtableLookup) +OP(InstanceNorm) +OP(PReLU) +OP(TransposeConv) +OP(SQRT) +OP(SquaredDifference) +OP(TopKV2) +OP(Gather) +OP(Neg) +OP(Abs) +OP(ArgMax) +OP(Dequantize) +OP(Mean) +OP(LocalResponseNormalization) +OP(DepthToSpace) +OP(Pack) +OP(ReduceMin) +OP(Split) +OP(Unpack) +OP(Pad) +OP(Custom) +OP(Permute) +OP(Min) +OP(Max) +OP(OneHot) diff --git a/runtime/neurun/core/include/ir/Shape.h b/runtime/neurun/core/include/ir/Shape.h new file mode 100644 index 000000000..a58af38ad --- /dev/null +++ b/runtime/neurun/core/include/ir/Shape.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_SHAPE_H__ +#define __NEURUN_IR_SHAPE_H__ + +#include "ir/Layout.h" +#include "misc/feature/Shape.h" + +#include <cstdint> +#include <vector> + +namespace neurun +{ +namespace ir +{ + +// TODO Remove this dependency. +using FeatureShape = nnfw::misc::feature::Shape; + +struct Shape +{ +public: + Shape() = default; + + explicit Shape(int rank) : _dimensions(rank) {} + + Shape(std::initializer_list<int32_t> dimensions) : _dimensions(dimensions) {} + + int rank() const { return _dimensions.size(); } + + const std::vector<int32_t> &dims() const { return _dimensions; } + + int32_t dim(int i) const { return _dimensions.at(i); } + + int32_t &dim(int i) { return _dimensions.at(i); } + + uint64_t num_elements() const; + +public: + FeatureShape asFeature(Layout layout) const; + + /** + * @brief Add dimension to the beginning + * @param[in] d dimension to add to the beginning + */ + void prepend(int32_t d) { _dimensions.insert(_dimensions.cbegin(), d); } + + /** + * @brief Add dimension to the end + * @param[in] d dimension to add to the end + */ + void append(int32_t d) { _dimensions.emplace_back(d); } + + /** + * @brief Extend rank of Shape object for operand with param. + * @param[in] to_rank The rank value to be extended to + */ + void extendRank(int to_rank); + +private: + std::vector<int32_t> _dimensions; +}; + +inline bool operator==(const Shape &lhs, const Shape &rhs) { return lhs.dims() == rhs.dims(); } +inline bool operator!=(const Shape &lhs, const Shape &rhs) { return lhs.dims() != rhs.dims(); } + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_SHAPE_H__ diff --git a/runtime/neurun/core/include/ir/Subgraphs.h b/runtime/neurun/core/include/ir/Subgraphs.h new file mode 100644 index 000000000..716f09bcf --- /dev/null +++ b/runtime/neurun/core/include/ir/Subgraphs.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_SUBGRAPHS_H__ +#define __NEURUN_IR_SUBGRAPHS_H__ + +#include "ir/Index.h" +#include "ir/OpSequence.h" +#include "util/ObjectManager.h" + +namespace neurun +{ +namespace ir +{ + +/** + * @brief Class that manages OpSequence objects + */ +class Subgraphs : public util::ObjectManager<SubgraphIndex, OpSequence> +{ +public: + /** + * @brief Create an instance of OpSequence with given op and push it to objects + * + * @param[in] op_idx Operation index that is emplaced + * @param[in] op Operation that is emplaced + * @param[in] layout OpSequence's layout + * @return SubgraphIndex + */ + SubgraphIndex emplace(const OperationIndex &op_index, const Operation &op, Layout layout); + + /** + * @brief Push an instance of OpSequence to objects + * + * @param[in] subg An instance of OpSequence + * @return SubgraphIndex + */ + SubgraphIndex emplace(std::unique_ptr<OpSequence> &&subg); + + /** + * @brief Check if an operation does exist in any subgraphs + * + * @param operation_index Operation index to find + * @return true If such operation exists in any subgraphs otherwise false + */ + bool containsOperation(const OperationIndex &operation_index) const; + /** + * @brief Find an operation from all subgraphs + * + * @param operation_index Operation index to find + * @return SubgraphIndex Index of OpSequence that contains given operation index + */ + SubgraphIndex getOperation(const OperationIndex &operation_index) const; + /** + * @brief Dump subgraphs + * + * @param msg Message that will be displayed + */ + void dump(const std::string &msg) const; + /** + * @brief Remove an operation from OpSequence + * + * @param operation_index Operation index to be removed + */ + void removeFromSubgraph(const OperationIndex &operation_index); + +private: + SubgraphIndex findOperation(const OperationIndex &operation_index) const; +}; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_SUBGRAPHS_H__ diff --git a/runtime/neurun/core/include/ir/TypeInfo.h b/runtime/neurun/core/include/ir/TypeInfo.h new file mode 100644 index 000000000..5b35046bb --- /dev/null +++ b/runtime/neurun/core/include/ir/TypeInfo.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_TYPEINFO_H__ +#define __NEURUN_IR_TYPEINFO_H__ + +#include <cstdint> + +#include "ir/DataType.h" + +namespace neurun +{ +namespace ir +{ + +class TypeInfo +{ +public: + TypeInfo() = delete; + + explicit TypeInfo(DataType type, float scale = 0, int32_t offset = 0) + : _type(type), _scale(scale), _offset(offset) + { + } + +public: + DataType type() const { return _type; } + float scale() const { return _scale; } + int32_t offset() const { return _offset; } + +public: + void type(const DataType type) { _type = type; } + +private: + DataType _type; + float _scale; + int32_t _offset; +}; + +bool operator==(const TypeInfo &lhs, const TypeInfo &rhs); +bool operator!=(const TypeInfo &lhs, const TypeInfo &rhs); + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_TYPEINFO_H__ diff --git a/runtime/neurun/core/include/ir/operand/LowerInfo.h b/runtime/neurun/core/include/ir/operand/LowerInfo.h new file mode 100644 index 000000000..e0fb2c5c8 --- /dev/null +++ b/runtime/neurun/core/include/ir/operand/LowerInfo.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERAND_LOWER_INFO_H__ +#define __NEURUN_IR_OPERAND_LOWER_INFO_H__ + +#include <functional> +#include <stdint.h> + +#include "ir/operand/PermuteFactor.h" +#include "util/Set.h" + +namespace neurun +{ +namespace backend +{ +class Backend; +} // namespace backend +} // namespace neurun + +namespace neurun +{ +namespace ir +{ +namespace operand +{ +using PermuteFactorSet = util::Set<PermuteFactor>; + +class LowerInfo +{ +public: + class Shape4D + { + public: + Shape4D(uint32_t n, uint32_t h, uint32_t w, uint32_t c) : _n{n}, _h{h}, _w{w}, _c{c} + { + // DO NOTHING + } + + public: + uint32_t n(void) const { return _n; } + uint32_t h(void) const { return _h; } + uint32_t w(void) const { return _w; } + uint32_t c(void) const { return _c; } + + private: + uint32_t _n; + uint32_t _h; + uint32_t _w; + uint32_t _c; + }; + +public: + LowerInfo(const Shape4D &shape) : _shape{shape} + { + // DO NOTHING + } + +public: + const Shape4D &shape(void) const { return _shape; } + const PermuteFactorSet &def_factors(void) const { return _def_factors; } + const PermuteFactorSet &use_factors(void) const { return _use_factors; } + +public: + void addDefPermuteFactor(const PermuteFactor &factor) { _def_factors.add(factor); } + void addUsePermuteFactor(const PermuteFactor &factor) { _use_factors.add(factor); } + void removeDefPermuteFactor(const PermuteFactor &factor) { _def_factors.remove(factor); } + void removeUsePermuteFactor(const PermuteFactor &factor) { _use_factors.remove(factor); } + +private: + Shape4D _shape; + PermuteFactorSet _def_factors; + PermuteFactorSet _use_factors; +}; + +} // namespace operand +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERAND_LOWER_INFO_H__ diff --git a/runtime/neurun/core/include/ir/operand/ParentInfo.h b/runtime/neurun/core/include/ir/operand/ParentInfo.h new file mode 100644 index 000000000..92dac2b63 --- /dev/null +++ b/runtime/neurun/core/include/ir/operand/ParentInfo.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file ParentInfo.h + * @brief This file contains ParentInfo class and internal Coordinate4D class + * to represent subsumption between operand + */ + +#ifndef __NEURUN_IR_OPERAND_PARENT_INFO_H__ +#define __NEURUN_IR_OPERAND_PARENT_INFO_H__ + +#include <stdint.h> + +#include "ir/Index.h" +#include "util/Coordinates.h" + +namespace neurun +{ +namespace ir +{ +namespace operand +{ + +/** + * @brief Class to represent parent operand in child operand + */ +class ParentInfo +{ +public: + /** + * @brief Construct a new ParentInfo object + * @param[in] parent Index of parent operand + * @param[in] coordinate Offset of child operand in parent operand + * @return + */ + ParentInfo(const OperandIndex parent, const util::Coordinates &coordinate) + : _parent{parent}, _coordinate{coordinate} + { + // DO NOTHING + } + +public: + /** + * @brief Return parent index + * @return Parent index + */ + OperandIndex parent(void) const { return _parent; } + /** + * @brief Retern offset in parent + * @return Offset + */ + util::Coordinates offset(void) const { return _coordinate; } + +private: + OperandIndex _parent; + util::Coordinates _coordinate; +}; + +} // namespace operand +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERAND_PARENT_INFO_H__ diff --git a/runtime/neurun/core/include/ir/operand/PermuteFactor.h b/runtime/neurun/core/include/ir/operand/PermuteFactor.h new file mode 100644 index 000000000..60d926b2d --- /dev/null +++ b/runtime/neurun/core/include/ir/operand/PermuteFactor.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file PermuteFactor.h + * @brief This file contains neurun::ir::operand::PermuteFactor class + * @ingroup COM_AI_RUNTIME + */ + +#ifndef __NEURUN_IR_OPERAND_PERMUTE_FACTOR_H__ +#define __NEURUN_IR_OPERAND_PERMUTE_FACTOR_H__ + +#include <functional> + +#include "ir/Layout.h" + +namespace neurun +{ +namespace backend +{ +class Backend; +} // namespace backend +} // namespace neurun + +namespace neurun +{ +namespace ir +{ +namespace operand +{ + +/** + * @brief Class that has factors of permutation + */ +class PermuteFactor +{ +public: + /** + * @brief Construct PermuteFactor object. + * @param backend The backend factor + * @param backend The layout factor + */ + PermuteFactor(const backend::Backend *backend, Layout layout) : _backend{backend}, _layout{layout} + { + // DO NOTHING + } + /** + * @brief Construct PermuteFactor object by copy semantics. + */ + PermuteFactor(const PermuteFactor &f) : _backend{f._backend}, _layout{f._layout} + { + // DO NOTHING + } + /** + * @brief Construct PermuteFactor object by move semantics. + */ + PermuteFactor(PermuteFactor &&) = default; + +public: + /** + * @brief Get backend + * + * @return Backend factor + */ + const backend::Backend *backend() const { return _backend; } + /** + * @brief Get layout + * + * @return Layout factor + */ + Layout layout() const { return _layout; } + +public: + /** + * @brief operator overloading function for `==` + * + * @return Whether two PermuteFactor are the same + */ + bool operator==(const PermuteFactor &other) const + { + return _backend == other.backend() && _layout == other.layout(); + } + /** + * @brief operator overloading function for `!=` + * + * @return Whether two PermuteFactor are differenct + */ + bool operator!=(const PermuteFactor &other) const { return !(*this == other); } + +private: + const backend::Backend *_backend{nullptr}; + Layout _layout{Layout::UNKNOWN}; +}; + +} // namespace operand +} // namespace ir +} // namespace neurun + +namespace std +{ + +/** + * @brief Structure that provides hash value of PermuteFactor + */ +template <> struct hash<neurun::ir::operand::PermuteFactor> +{ + size_t operator()(const neurun::ir::operand::PermuteFactor &factor) const noexcept + { + hash<const neurun::backend::Backend *> b_hash{}; + hash<neurun::ir::Layout> l_hash{}; + return b_hash(factor.backend()) ^ (l_hash(factor.layout()) << 1); + } +}; + +} // namespace std + +#endif // __NEURUN_IR_OPERAND_PERMUTE_FACTOR_H__ diff --git a/runtime/neurun/core/include/ir/operation/Abs.h b/runtime/neurun/core/include/ir/operation/Abs.h new file mode 100644 index 000000000..97293823b --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Abs.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_ABS_H__ +#define __NEURUN_IR_OPERATION_ABS_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Abs : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + +public: + Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Abs; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_ABS_H__ diff --git a/runtime/neurun/core/include/ir/operation/Add.h b/runtime/neurun/core/include/ir/operation/Add.h new file mode 100644 index 000000000..fc4d6a7e7 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Add.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_ADD_H__ +#define __NEURUN_IR_OPERATION_ADD_H__ + +#include "ir/Operation.h" +#include "ir/InternalType.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Add : public Operation +{ +public: + enum Input + { + LHS = 0, + RHS + }; + + struct Param + { + Activation activation; + }; + +public: + Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Add; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_ADD_H__ diff --git a/runtime/neurun/core/include/ir/operation/ArgMax.h b/runtime/neurun/core/include/ir/operation/ArgMax.h new file mode 100644 index 000000000..23f52710f --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/ArgMax.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_ARG_MAX_H__ +#define __NEURUN_IR_OPERATION_ARG_MAX_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class ArgMax : public Operation +{ +public: + enum Input + { + INPUT + }; + + struct Param + { + int axis; + int rank; + }; + +public: + ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::ArgMax; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_ARG_MAX_H__ diff --git a/runtime/neurun/core/include/ir/operation/AvgPool2D.h b/runtime/neurun/core/include/ir/operation/AvgPool2D.h new file mode 100644 index 000000000..a03628184 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/AvgPool2D.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_AVGPOOL2D_H__ +#define __NEURUN_IR_OPERATION_AVGPOOL2D_H__ + +#include <memory> + +#include "ir/Operation.h" +#include "ir/InternalType.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class AvgPool2D : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + + struct Param + { + uint32_t kh; + uint32_t kw; + + Stride stride; + Padding padding; + Activation activation; + }; + +public: + AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::AvgPool2D; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_AVGPOOL2D_H__ diff --git a/runtime/neurun/core/include/ir/operation/BatchToSpaceND.h b/runtime/neurun/core/include/ir/operation/BatchToSpaceND.h new file mode 100644 index 000000000..b90d2871d --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/BatchToSpaceND.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_BATCH_TO_SPACE_ND_H__ +#define __NEURUN_IR_OPERATION_BATCH_TO_SPACE_ND_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class BatchToSpaceND : public Operation +{ +public: + enum Input + { + INPUT = 0, + BLOCK_SIZE = 1 + }; + +public: + BatchToSpaceND(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::BatchToSpaceND; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_BATCH_TO_SPACE_ND_H__ diff --git a/runtime/neurun/core/include/ir/operation/Cast.h b/runtime/neurun/core/include/ir/operation/Cast.h new file mode 100644 index 000000000..a71087dd0 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Cast.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_CAST_H__ +#define __NEURUN_IR_OPERATION_CAST_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Cast : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + +public: + Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Cast; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_CAST_H__ diff --git a/runtime/neurun/core/include/ir/operation/Comparison.h b/runtime/neurun/core/include/ir/operation/Comparison.h new file mode 100644 index 000000000..23c775c42 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Comparison.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_COMPARISON_H__ +#define __NEURUN_IR_OPERATION_COMPARISON_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Comparison : public Operation +{ +public: + enum Input + { + INPUT0 = 0, + INPUT1 + }; + + enum class ComparisonType + { + Equal, + NotEqual, + Greater, + GreaterEqual, + Less, + LessEqual + }; + + struct Param + { + ComparisonType comparison_type; + }; + +public: + Comparison(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Comparison; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_COMPARISON_H__ diff --git a/runtime/neurun/core/include/ir/operation/Concat.h b/runtime/neurun/core/include/ir/operation/Concat.h new file mode 100644 index 000000000..8628ed398 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Concat.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_CONCAT_H__ +#define __NEURUN_IR_OPERATION_CONCAT_H__ + +#include <memory> + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Concat : public Operation +{ +public: + struct Param + { + int32_t axis; + int32_t rank; + }; + +public: + Concat(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Concat; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_CONCAT_H__ diff --git a/runtime/neurun/core/include/ir/operation/Conv2D.h b/runtime/neurun/core/include/ir/operation/Conv2D.h new file mode 100644 index 000000000..1215666e9 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Conv2D.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_CONV2D_H__ +#define __NEURUN_IR_OPERATION_CONV2D_H__ + +#include <memory> + +#include "ir/Operation.h" +#include "ir/InternalType.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Conv2D : public Operation +{ +public: + enum Input + { + INPUT = 0, + KERNEL, + BIAS + }; + + struct Param + { + Stride stride; + Padding padding; + Activation activation; + }; + +public: + Conv2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Conv2D; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_CONV2D_H__ diff --git a/runtime/neurun/core/include/ir/operation/Custom.h b/runtime/neurun/core/include/ir/operation/Custom.h new file mode 100644 index 000000000..03501e8ef --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Custom.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __NEURUN_IR_OPERATION_CUSTOM_H__ +#define __NEURUN_IR_OPERATION_CUSTOM_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Custom : public Operation +{ +public: + struct Userdata + { + char *data; + size_t size; + }; + + Custom(OperandConstraint input_constr, const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs, std::string id, const Userdata &userdata); + + void accept(OperationVisitor &v) const override; + +public: + /** + * @return unique operation identifier + */ + const std::string &id() const; + + std::string name() const override; + OpCode opcode() const final { return OpCode::Custom; } + + /** + * @return user-provided data + */ + const Userdata &userdata() const; + + ~Custom() override; + +private: + std::string _id; + Userdata _userdata; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun +#endif // __NEURUN_IR_OPERATION_CUSTOM_H__ diff --git a/runtime/neurun/core/include/ir/operation/DepthToSpace.h b/runtime/neurun/core/include/ir/operation/DepthToSpace.h new file mode 100644 index 000000000..6e7aaf249 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/DepthToSpace.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_DEPTH_TO_SPACE_H__ +#define __NEURUN_IR_OPERATION_DEPTH_TO_SPACE_H__ + +#include <memory> + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class DepthToSpace : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + + struct Param + { + std::int32_t block_size; + }; + +public: + DepthToSpace(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::DepthToSpace; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_DEPTH_TO_SPACE_H__ diff --git a/runtime/neurun/core/include/ir/operation/DepthwiseConv2D.h b/runtime/neurun/core/include/ir/operation/DepthwiseConv2D.h new file mode 100644 index 000000000..1f0926fa8 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/DepthwiseConv2D.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_DEPTHWISECONV2D_H__ +#define __NEURUN_IR_OPERATION_DEPTHWISECONV2D_H__ + +#include <memory> + +#include "ir/Operation.h" +#include "ir/InternalType.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class DepthwiseConv2D : public Operation +{ +public: + enum Input + { + INPUT = 0, + KERNEL, + BIAS + }; + + struct Param + { + Stride stride; + Padding padding; + uint32_t multiplier; + Activation activation; + }; + +public: + DepthwiseConv2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::DepthwiseConv2D; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_DEPTHWISECONV2D_H__ diff --git a/runtime/neurun/core/include/ir/operation/Dequantize.h b/runtime/neurun/core/include/ir/operation/Dequantize.h new file mode 100644 index 000000000..dfca278cd --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Dequantize.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_DEQUANTIZE_H__ +#define __NEURUN_IR_OPERATION_DEQUANTIZE_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Dequantize : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + +public: + Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Dequantize; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_DEQUANTIZE_H__ diff --git a/runtime/neurun/core/include/ir/operation/Div.h b/runtime/neurun/core/include/ir/operation/Div.h new file mode 100644 index 000000000..d3e744472 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Div.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_DIV_H__ +#define __NEURUN_IR_OPERATION_DIV_H__ + +#include "ir/Operation.h" +#include "ir/InternalType.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Div : public Operation +{ +public: + enum Input + { + LHS = 0, + RHS + }; + + struct Param + { + Activation activation; + }; + +public: + Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Div; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_DIV_H__ diff --git a/runtime/neurun/core/include/ir/operation/EmbeddingLookup.h b/runtime/neurun/core/include/ir/operation/EmbeddingLookup.h new file mode 100644 index 000000000..968b7b35a --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/EmbeddingLookup.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_EMBEDDING_LOOKUP_H__ +#define __NEURUN_IR_OPERATION_EMBEDDING_LOOKUP_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class EmbeddingLookup : public Operation +{ +public: + enum Input + { + LOOKUPS = 0, + VALUES = 1 + }; + +public: + EmbeddingLookup(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::EmbeddingLookup; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_EMBEDDING_LOOKUP_H__ diff --git a/runtime/neurun/core/include/ir/operation/Exp.h b/runtime/neurun/core/include/ir/operation/Exp.h new file mode 100644 index 000000000..8e04f3f7f --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Exp.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_EXP_H__ +#define __NEURUN_IR_OPERATION_EXP_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Exp : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + +public: + Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Exp; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_EXP_H__ diff --git a/runtime/neurun/core/include/ir/operation/Floor.h b/runtime/neurun/core/include/ir/operation/Floor.h new file mode 100644 index 000000000..ca4cf9881 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Floor.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_FLOOR_H__ +#define __NEURUN_IR_OPERATION_FLOOR_H__ + +#include <memory> + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Floor : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + +public: + Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Floor; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_FLOOR_H__ diff --git a/runtime/neurun/core/include/ir/operation/FullyConnected.h b/runtime/neurun/core/include/ir/operation/FullyConnected.h new file mode 100644 index 000000000..1ffa1318d --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/FullyConnected.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_FULLYCONNECTED_H__ +#define __NEURUN_IR_OPERATION_FULLYCONNECTED_H__ + +#include <memory> + +#include "ir/Operation.h" +#include "ir/InternalType.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class FullyConnected : public Operation +{ +public: + enum Input + { + INPUT = 0, + WEIGHT, + BIAS + }; + + struct Param + { + Activation activation; + }; + +public: + FullyConnected(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::FullyConnected; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_FULLYCONNECTED_H__ diff --git a/runtime/neurun/core/include/ir/operation/Gather.h b/runtime/neurun/core/include/ir/operation/Gather.h new file mode 100644 index 000000000..13540d413 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Gather.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_GATHER_H__ +#define __NEURUN_IR_OPERATION_GATHER_H__ + +#include <memory> + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Gather : public Operation +{ +public: + enum Input + { + INPUT = 0, + INDICES, + }; + + struct Param + { + int32_t axis; + int32_t rank; + }; + +public: + Gather(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Gather; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_GATHER_H__ diff --git a/runtime/neurun/core/include/ir/operation/HashtableLookup.h b/runtime/neurun/core/include/ir/operation/HashtableLookup.h new file mode 100644 index 000000000..fb6c97607 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/HashtableLookup.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_HASHTABLE_LOOKUP_H__ +#define __NEURUN_IR_OPERATION_HASHTABLE_LOOKUP_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class HashtableLookup : public Operation +{ +public: + enum Input + { + LOOKUPS = 0, + KEYS = 1, + VALUES = 2 + }; + + enum Output + { + OUTPUT = 0, + HITS = 1 + }; + +public: + HashtableLookup(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::HashtableLookup; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_HASHTABLE_LOOKUP_H__ diff --git a/runtime/neurun/core/include/ir/operation/InstanceNorm.h b/runtime/neurun/core/include/ir/operation/InstanceNorm.h new file mode 100644 index 000000000..cbd03ad1f --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/InstanceNorm.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_INSTANCE_NORM_H__ +#define __NEURUN_IR_OPERATION_INSTANCE_NORM_H__ + +#include "ir/Operation.h" +#include "ir/InternalType.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class InstanceNorm : public Operation +{ +public: + enum Input + { + INPUT = 0, + GAMMA, + BETA + }; + + struct Param + { + Activation activation; + float epsilon; + }; + +public: + InstanceNorm(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::InstanceNorm; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_INSTANCE_NORM_H__ diff --git a/runtime/neurun/core/include/ir/operation/L2Normalization.h b/runtime/neurun/core/include/ir/operation/L2Normalization.h new file mode 100644 index 000000000..e2c1f4eee --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/L2Normalization.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_L2_NORMALIZATION_H__ +#define __NEURUN_IR_OPERATION_L2_NORMALIZATION_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class L2Normalization : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + +public: + struct Param + { + int32_t rank; + }; + +public: + L2Normalization(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::L2Normalization; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_L2_NORMALIZATION_H__ diff --git a/runtime/neurun/core/include/ir/operation/L2Pool2D.h b/runtime/neurun/core/include/ir/operation/L2Pool2D.h new file mode 100644 index 000000000..6d4d72ee2 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/L2Pool2D.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_L2_POOL_2D_H__ +#define __NEURUN_IR_OPERATION_L2_POOL_2D_H__ + +#include <memory> + +#include "ir/Operation.h" +#include "ir/InternalType.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class L2Pool2D : public Operation +{ +public: + enum Input + { + INPUT = 0, + }; + + struct Param + { + Padding padding; + Stride stride; + uint32_t kw; + uint32_t kh; + Activation activation; + }; + +public: + L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::L2Pool2D; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_L2_POOL_2D_H__ diff --git a/runtime/neurun/core/include/ir/operation/LSTM.h b/runtime/neurun/core/include/ir/operation/LSTM.h new file mode 100644 index 000000000..2ea09b1b7 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/LSTM.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __NEURUN_IR_OPERATION_LSTM_H__ +#define __NEURUN_IR_OPERATION_LSTM_H__ + +#include "ir/InternalType.h" +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class LSTM : public Operation +{ +public: + enum Input + { + INPUT = 0, + INPUT_TO_INPUT_WEIGHTS = 1, + INPUT_TO_FORGET_WEIGHTS = 2, + INPUT_TO_CELL_WEIGHTS = 3, + INPUT_TO_OUTPUT_WEIGHTS = 4, + RECURRENT_TO_INPUT_WEIGHTS = 5, + RECURRENT_TO_FORGET_WEIGHTS = 6, + RECURRENT_TO_CELL_WEIGHTS = 7, + RECURRENT_TO_OUTPUT_WEIGHTS = 8, + CELL_TO_INPUT_WEIGHTS = 9, + CELL_TO_FORGET_WEIGHTS = 10, + CELL_TO_OUTPUT_WEIGHTS = 11, + INPUT_GATE_BIAS = 12, + FORGET_GATE_BIAS = 13, + CELL_BIAS = 14, + OUTPUT_GATE_BIAS = 15, + PROJECTION_WEIGHTS = 16, + PROJECTION_BIAS = 17, + OUTPUT_STATE_IN = 18, + CELL_STATE_IN = 19, + }; + + enum Output + { + SCRATCH_BUFFER = 0, + OUTPUT_STATE_OUT = 1, + CELL_STATE_OUT = 2, + OUTPUT = 3 + }; + + struct Param + { + Activation activation; + float cell_threshold; + float projection_threshold; + }; + +public: + LSTM(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::LSTM; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_LSTM_H__ diff --git a/runtime/neurun/core/include/ir/operation/LocalResponseNormalization.h b/runtime/neurun/core/include/ir/operation/LocalResponseNormalization.h new file mode 100644 index 000000000..3fbf2e4ae --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/LocalResponseNormalization.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_LOCAL_RESPONSE_NORMALIZATION_H__ +#define __NEURUN_IR_OPERATION_LOCAL_RESPONSE_NORMALIZATION_H__ + +#include <memory> + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class LocalResponseNormalization : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + + struct Param + { + int radius; + float bias; + float alpha; + float beta; + }; + +public: + LocalResponseNormalization(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs, const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::LocalResponseNormalization; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_LOCAL_RESPONSE_NORMALIZATION_H__ diff --git a/runtime/neurun/core/include/ir/operation/LogicalAnd.h b/runtime/neurun/core/include/ir/operation/LogicalAnd.h new file mode 100644 index 000000000..f7b03d62d --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/LogicalAnd.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_LOGICAL_AND_H__ +#define __NEURUN_IR_OPERATION_LOGICAL_AND_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class LogicalAnd : public Operation +{ +public: + enum Input + { + INPUT0 = 0, + INPUT1 = 1, + }; + +public: + LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::LogicalAnd; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_LOGICAL_AND_H__ diff --git a/runtime/neurun/core/include/ir/operation/LogicalNot.h b/runtime/neurun/core/include/ir/operation/LogicalNot.h new file mode 100644 index 000000000..e689d57b2 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/LogicalNot.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_LOGICAL_NOT_H__ +#define __NEURUN_IR_OPERATION_LOGICAL_NOT_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class LogicalNot : public Operation +{ +public: + enum Input + { + INPUT = 0, + }; + +public: + LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::LogicalNot; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_LOGICAL_NOT_H__ diff --git a/runtime/neurun/core/include/ir/operation/LogicalOr.h b/runtime/neurun/core/include/ir/operation/LogicalOr.h new file mode 100644 index 000000000..fda6b20d7 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/LogicalOr.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_LOGICAL_OR_H__ +#define __NEURUN_IR_OPERATION_LOGICAL_OR_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class LogicalOr : public Operation +{ +public: + enum Input + { + INPUT0 = 0, + INPUT1 = 1, + }; + +public: + LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::LogicalOr; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_LOGICAL_OR_H__ diff --git a/runtime/neurun/core/include/ir/operation/Logistic.h b/runtime/neurun/core/include/ir/operation/Logistic.h new file mode 100644 index 000000000..b23e7ef3f --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Logistic.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_LOGISTIC_H__ +#define __NEURUN_IR_OPERATION_LOGISTIC_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Logistic : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + +public: + Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Logistic; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_LOGISTIC_H__ diff --git a/runtime/neurun/core/include/ir/operation/LowerInfo.h b/runtime/neurun/core/include/ir/operation/LowerInfo.h new file mode 100644 index 000000000..856976a0c --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/LowerInfo.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_LOWER_INFO_H__ +#define __NEURUN_IR_OPERATION_LOWER_INFO_H__ + +#include <string> + +#include <ir/operand/PermuteFactor.h> + +namespace neurun +{ +namespace backend +{ +class Backend; +} // namespace backend +} // namespace neurun + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class LowerInfo +{ +public: + LowerInfo(const backend::Backend *backend, Layout layout); + const backend::Backend *backend() const { return _permute_factor.backend(); } + Layout layout() const { return _permute_factor.layout(); } + +private: + operand::PermuteFactor _permute_factor; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_LOWER_INFO_H__ diff --git a/runtime/neurun/core/include/ir/operation/Max.h b/runtime/neurun/core/include/ir/operation/Max.h new file mode 100644 index 000000000..1675f9f72 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Max.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_MAX_H__ +#define __NEURUN_IR_OPERATION_MAX_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Max : public Operation +{ +public: + enum Input + { + LHS = 0, + RHS + }; + +public: + Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Max; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_MAX_H__ diff --git a/runtime/neurun/core/include/ir/operation/MaxPool2D.h b/runtime/neurun/core/include/ir/operation/MaxPool2D.h new file mode 100644 index 000000000..c0f0939aa --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/MaxPool2D.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_MAXPOOL2D_H__ +#define __NEURUN_IR_OPERATION_MAXPOOL2D_H__ + +#include <memory> + +#include "ir/Operation.h" +#include "ir/InternalType.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class MaxPool2D : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + + struct Param + { + uint32_t kh; + uint32_t kw; + Stride stride; + Padding padding; + Activation activation; + }; + +public: + MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::MaxPool2D; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_MAXPOOL2D_H__ diff --git a/runtime/neurun/core/include/ir/operation/Mean.h b/runtime/neurun/core/include/ir/operation/Mean.h new file mode 100644 index 000000000..cce8de377 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Mean.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_MEAN_H__ +#define __NEURUN_IR_OPERATION_MEAN_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Mean : public Operation +{ +public: + enum Input + { + INPUT + }; + + struct Param + { + std::vector<int> axes; + bool keep_dims; + int32_t rank; + }; + +public: + Mean(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Mean; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_MEAN_H__ diff --git a/runtime/neurun/core/include/ir/operation/Min.h b/runtime/neurun/core/include/ir/operation/Min.h new file mode 100644 index 000000000..fac901f21 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Min.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_MIN_H__ +#define __NEURUN_IR_OPERATION_MIN_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Min : public Operation +{ +public: + enum Input + { + LHS = 0, + RHS + }; + +public: + Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Min; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_MIN_H__ diff --git a/runtime/neurun/core/include/ir/operation/Mul.h b/runtime/neurun/core/include/ir/operation/Mul.h new file mode 100644 index 000000000..f6cfe2afb --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Mul.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_MUL_H__ +#define __NEURUN_IR_OPERATION_MUL_H__ + +#include "ir/Operation.h" +#include "ir/InternalType.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Mul : public Operation +{ +public: + enum Input + { + LHS = 0, + RHS + }; + + struct Param + { + Activation activation; + }; + +public: + Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Mul; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_MUL_H__ diff --git a/runtime/neurun/core/include/ir/operation/Neg.h b/runtime/neurun/core/include/ir/operation/Neg.h new file mode 100644 index 000000000..ec364f8ad --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Neg.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_NEG_H__ +#define __NEURUN_IR_OPERATION_NEG_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Neg : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + +public: + Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Neg; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_NEG_H__ diff --git a/runtime/neurun/core/include/ir/operation/OneHot.h b/runtime/neurun/core/include/ir/operation/OneHot.h new file mode 100644 index 000000000..5fbc5d45f --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/OneHot.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_ONEHOT_H__ +#define __NEURUN_IR_OPERATION_ONEHOT_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class OneHot : public Operation +{ +public: + enum Input + { + INDICES = 0, + DEPTH = 1, + ON_VALUE = 2, + OFF_VALUE = 3, + }; + + struct Param + { + int axis; + }; + +public: + OneHot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::OneHot; } + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_ONEHOT_H__ diff --git a/runtime/neurun/core/include/ir/operation/PReLU.h b/runtime/neurun/core/include/ir/operation/PReLU.h new file mode 100644 index 000000000..8c00c46fd --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/PReLU.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_PRELU_H__ +#define __NEURUN_IR_OPERATION_PRELU_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class PReLU : public Operation +{ +public: + enum Input + { + INPUT = 0, + ALPHA = 1 + }; + +public: + PReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::PReLU; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_PRELU_H__ diff --git a/runtime/neurun/core/include/ir/operation/Pack.h b/runtime/neurun/core/include/ir/operation/Pack.h new file mode 100644 index 000000000..ccf73fe5c --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Pack.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __NEURUN_IR_OPERATION_PACK_H__ +#define __NEURUN_IR_OPERATION_PACK_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ +class Pack : public Operation +{ +public: + struct Param + { + int32_t num; + int32_t axis; + int32_t rank; + }; + +public: + Pack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Pack; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; +} // namespace operation +} // namespace ir +} // namespace neurun +#endif // __NEURUN_IR_OPERATION_PACK_H__ diff --git a/runtime/neurun/core/include/ir/operation/Pad.h b/runtime/neurun/core/include/ir/operation/Pad.h new file mode 100644 index 000000000..8e8304fae --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Pad.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_PAD_H__ +#define __NEURUN_IR_OPERATION_PAD_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Pad : public Operation +{ +public: + enum Input + { + INPUT = 0, + PAD = 1, + // VALUE = 2 Not allow padding value operand yet + }; + +public: + struct Param + { + int32_t rank; + }; + +public: + Pad(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Pad; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_PAD_H__ diff --git a/runtime/neurun/core/include/ir/operation/Permute.h b/runtime/neurun/core/include/ir/operation/Permute.h new file mode 100644 index 000000000..f91f9571b --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Permute.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_PERMUTE_H__ +#define __NEURUN_IR_OPERATION_PERMUTE_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace backend +{ +class BackendContext; +} // namespace backend +} // namespace neurun + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Permute : public Operation +{ +public: + enum class Type + { + NHWC_TO_NCHW, + NCHW_TO_NHWC, + COPY + }; + + struct Param + { + const backend::BackendContext *input_backend_ctx; + const backend::BackendContext *output_backend_ctx; + }; + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Permute; } + +public: + Permute(const OperandIndex &input, const OperandIndex &output, + const backend::BackendContext *input_backend_ctx, + const backend::BackendContext *output_backend_ctx, Type type, + DataType data_type = DataType::FLOAT32); + +public: + const Param ¶m() const { return _param; } + DataType getDataType() const { return _dataType; } + Type getPermuteType() const { return _type; } + +private: + Param _param; + Type _type; + DataType _dataType; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_PERMUTE_H__ diff --git a/runtime/neurun/core/include/ir/operation/RNN.h b/runtime/neurun/core/include/ir/operation/RNN.h new file mode 100644 index 000000000..d812a6fc3 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/RNN.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __NEURUN_IR_OPERATION_RNN_H__ +#define __NEURUN_IR_OPERATION_RNN_H__ + +#include "ir/InternalType.h" +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class RNN : public Operation +{ +public: + enum Input + { + INPUT = 0, + WEIGHTS = 1, + RECURRENT_WEIGHTS = 2, + BIAS = 3, + HIDDEN_STATE_IN = 4 + }; + + enum Output + { + OUTPUT = 0, + HIDDEN_STATE_OUT = 1 + }; + + struct Param + { + Activation activation; + }; + +public: + RNN(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::RNN; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_RNN_H__ diff --git a/runtime/neurun/core/include/ir/operation/RSQRT.h b/runtime/neurun/core/include/ir/operation/RSQRT.h new file mode 100644 index 000000000..33648555a --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/RSQRT.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_RSQRT_H__ +#define __NEURUN_IR_OPERATION_RSQRT_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class RSQRT : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + +public: + RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::RSQRT; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_RSQRT_H__ diff --git a/runtime/neurun/core/include/ir/operation/ReLU.h b/runtime/neurun/core/include/ir/operation/ReLU.h new file mode 100644 index 000000000..b6c7fdf01 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/ReLU.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_RELU_H__ +#define __NEURUN_IR_OPERATION_RELU_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class ReLU : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + +public: + ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::ReLU; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_RELU_H__ diff --git a/runtime/neurun/core/include/ir/operation/ReLU1.h b/runtime/neurun/core/include/ir/operation/ReLU1.h new file mode 100644 index 000000000..ac45fda05 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/ReLU1.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_ReLU1_H__ +#define __NEURUN_IR_OPERATION_ReLU1_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class ReLU1 : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + +public: + ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::ReLU1; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_ReLU1_H__ diff --git a/runtime/neurun/core/include/ir/operation/ReLU6.h b/runtime/neurun/core/include/ir/operation/ReLU6.h new file mode 100644 index 000000000..4d98dad55 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/ReLU6.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_ReLU6_H__ +#define __NEURUN_IR_OPERATION_ReLU6_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class ReLU6 : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + +public: + ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::ReLU6; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_ReLU6_H__ diff --git a/runtime/neurun/core/include/ir/operation/ReduceMax.h b/runtime/neurun/core/include/ir/operation/ReduceMax.h new file mode 100644 index 000000000..da4d7c4cc --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/ReduceMax.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_REDUCEMAX_H__ +#define __NEURUN_IR_OPERATION_REDUCEMAX_H__ + +#include <memory> + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class ReduceMax : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + + struct Param + { + std::vector<int> axes; + bool keep_dims; + int32_t rank; + }; + +public: + ReduceMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::ReduceMax; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_REDUCEMAX_H__ diff --git a/runtime/neurun/core/include/ir/operation/ReduceMin.h b/runtime/neurun/core/include/ir/operation/ReduceMin.h new file mode 100644 index 000000000..f79fdeaea --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/ReduceMin.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_REDUCEMIN_H__ +#define __NEURUN_IR_OPERATION_REDUCEMIN_H__ + +#include <memory> + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class ReduceMin : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + + struct Param + { + std::vector<int> axes; + bool keep_dims; + int32_t rank; + }; + +public: + ReduceMin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::ReduceMin; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_REDUCEMIN_H__ diff --git a/runtime/neurun/core/include/ir/operation/ReduceSum.h b/runtime/neurun/core/include/ir/operation/ReduceSum.h new file mode 100644 index 000000000..b5ab8ee75 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/ReduceSum.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_REDUCE_SUM_H__ +#define __NEURUN_IR_OPERATION_REDUCE_SUM_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class ReduceSum : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + + struct Param + { + std::vector<int> axes; + bool keep_dims; + int32_t rank; + }; + +public: + ReduceSum(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::ReduceSum; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_REDUCE_SUM_H__ diff --git a/runtime/neurun/core/include/ir/operation/Reshape.h b/runtime/neurun/core/include/ir/operation/Reshape.h new file mode 100644 index 000000000..e476d7fe1 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Reshape.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_RESHAPE_H__ +#define __NEURUN_IR_OPERATION_RESHAPE_H__ + +#include <memory> + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Reshape : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + +public: + Reshape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Reshape; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_RESHAPE_H__ diff --git a/runtime/neurun/core/include/ir/operation/ResizeBilinear.h b/runtime/neurun/core/include/ir/operation/ResizeBilinear.h new file mode 100644 index 000000000..d937da00c --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/ResizeBilinear.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_RESIZE_BILINEAR_H__ +#define __NEURUN_IR_OPERATION_RESIZE_BILINEAR_H__ + +#include <memory> + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class ResizeBilinear : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + + struct Param + { + int32_t height_out; + int32_t width_out; + }; + +public: + ResizeBilinear(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::ResizeBilinear; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_RESIZE_BILINEAR_H__ diff --git a/runtime/neurun/core/include/ir/operation/SQRT.h b/runtime/neurun/core/include/ir/operation/SQRT.h new file mode 100644 index 000000000..5e21315b4 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/SQRT.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_SQRT_H__ +#define __NEURUN_IR_OPERATION_SQRT_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class SQRT : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + +public: + SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::SQRT; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_SQRT_H__ diff --git a/runtime/neurun/core/include/ir/operation/Slice.h b/runtime/neurun/core/include/ir/operation/Slice.h new file mode 100644 index 000000000..4b79f42a6 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Slice.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_SLICE_H__ +#define __NEURUN_IR_OPERATION_SLICE_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Slice : public Operation +{ +public: + enum Input + { + INPUT = 0, + BEGINS = 1, + SIZES = 2, + }; + +public: + struct Param + { + int32_t rank; + }; + +public: + Slice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Slice; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_SLICE_H__ diff --git a/runtime/neurun/core/include/ir/operation/Softmax.h b/runtime/neurun/core/include/ir/operation/Softmax.h new file mode 100644 index 000000000..a3e896fed --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Softmax.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_SOFTMAX_H__ +#define __NEURUN_IR_OPERATION_SOFTMAX_H__ + +#include <memory> + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Softmax : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + + struct Param + { + float beta; + }; + +public: + Softmax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Softmax; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_SOFTMAX_H__ diff --git a/runtime/neurun/core/include/ir/operation/SpaceToBatchND.h b/runtime/neurun/core/include/ir/operation/SpaceToBatchND.h new file mode 100644 index 000000000..4ca0978b0 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/SpaceToBatchND.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_SPACE_TO_BATCH_ND_H__ +#define __NEURUN_IR_OPERATION_SPACE_TO_BATCH_ND_H__ + +#include <memory> + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class SpaceToBatchND : public Operation +{ +public: + enum Input + { + INPUT = 0, + BLOCK_SIZE = 1, + PADDINGS = 2 + }; + +public: + SpaceToBatchND(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::SpaceToBatchND; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_SPACE_TO_BATCH_ND_H__ diff --git a/runtime/neurun/core/include/ir/operation/SpaceToDepth.h b/runtime/neurun/core/include/ir/operation/SpaceToDepth.h new file mode 100644 index 000000000..9e77bdae0 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/SpaceToDepth.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_SPACE_TO_DEPTH_H__ +#define __NEURUN_IR_OPERATION_SPACE_TO_DEPTH_H__ + +#include <memory> + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class SpaceToDepth : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + + struct Param + { + std::int32_t block_size; + }; + +public: + SpaceToDepth(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::SpaceToDepth; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_SPACE_TO_DEPTH_H__ diff --git a/runtime/neurun/core/include/ir/operation/Split.h b/runtime/neurun/core/include/ir/operation/Split.h new file mode 100644 index 000000000..7a2749e84 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Split.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __NEURUN_IR_OPERATION_SPLIT_H__ +#define __NEURUN_IR_OPERATION_SPLIT_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ +class Split : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + + struct Param + { + int axis; + int num_splits; + int rank; + }; + +public: + Split(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Split; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; +} // namespace operation +} // namespace ir +} // namespace neurun +#endif // __NEURUN_IR_OPERATION_SPLIT_H__ diff --git a/runtime/neurun/core/include/ir/operation/SquaredDifference.h b/runtime/neurun/core/include/ir/operation/SquaredDifference.h new file mode 100644 index 000000000..46df419f5 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/SquaredDifference.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_SQUARED_DIFFERENCE_H__ +#define __NEURUN_IR_OPERATION_SQUARED_DIFFERENCE_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class SquaredDifference : public Operation +{ +public: + enum Input + { + LHS = 0, + RHS + }; + +public: + SquaredDifference(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::SquaredDifference; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_SQUARED_DIFFERENCE_H__ diff --git a/runtime/neurun/core/include/ir/operation/Squeeze.h b/runtime/neurun/core/include/ir/operation/Squeeze.h new file mode 100644 index 000000000..d27b315b5 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Squeeze.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_SQUEEZE_H__ +#define __NEURUN_IR_OPERATION_SQUEEZE_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Squeeze : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + + struct Param + { + // Please see tensorflow/lite/c/builtin_op_data.h and squeeze.cc. + // tensorflow lite supports only for ndim <= 8. + int dims[8]; + int ndim; + }; + +public: + Squeeze(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Squeeze; } + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_SQUEEZE_H__ diff --git a/runtime/neurun/core/include/ir/operation/StridedSlice.h b/runtime/neurun/core/include/ir/operation/StridedSlice.h new file mode 100644 index 000000000..868bda72c --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/StridedSlice.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_STRIDED_SLICE_H__ +#define __NEURUN_IR_OPERATION_STRIDED_SLICE_H__ + +#include <memory> + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class StridedSlice : public Operation +{ +public: + enum Input + { + INPUT = 0, + STARTS = 1, + ENDS = 2, + STRIDES = 3 + }; + + struct Param + { + std::int32_t begin_mask; + std::int32_t end_mask; + std::int32_t shrink_axis_mask; + int32_t rank; + }; + +public: + StridedSlice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::StridedSlice; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_STRIDED_SLICE_H__ diff --git a/runtime/neurun/core/include/ir/operation/Sub.h b/runtime/neurun/core/include/ir/operation/Sub.h new file mode 100644 index 000000000..e5850af8c --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Sub.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_SUB_H__ +#define __NEURUN_IR_OPERATION_SUB_H__ + +#include "ir/Operation.h" +#include "ir/InternalType.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Sub : public Operation +{ +public: + enum Input + { + LHS = 0, + RHS + }; + + struct Param + { + Activation activation; + }; + +public: + Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Sub; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_SUB_H__ diff --git a/runtime/neurun/core/include/ir/operation/Tanh.h b/runtime/neurun/core/include/ir/operation/Tanh.h new file mode 100644 index 000000000..814ceec5a --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Tanh.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_TANH_H__ +#define __NEURUN_IR_OPERATION_TANH_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Tanh : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + +public: + Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Tanh; } +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_TANH_H__ diff --git a/runtime/neurun/core/include/ir/operation/TopKV2.h b/runtime/neurun/core/include/ir/operation/TopKV2.h new file mode 100644 index 000000000..a6971e843 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/TopKV2.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_TOPK_V2_H__ +#define __NEURUN_IR_OPERATION_TOPK_V2_H__ + +#include <memory> + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class TopKV2 : public Operation +{ +public: + enum Input + { + INPUT + }; + + enum Output + { + OUTPUT_VALUES = 0, + OUTPUT_INDICES, + }; + + struct Param + { + std::int32_t k; + }; + +public: + TopKV2(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::TopKV2; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_TOPK_V2_H__ diff --git a/runtime/neurun/core/include/ir/operation/Transpose.h b/runtime/neurun/core/include/ir/operation/Transpose.h new file mode 100644 index 000000000..b1e08a506 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Transpose.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_TRANSPOSE_H__ +#define __NEURUN_IR_OPERATION_TRANSPOSE_H__ + +#include "ir/Operation.h" + +#include <utility> + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class Transpose : public Operation +{ +public: + enum Input + { + INPUT = 0, // for an n-D tensor, specifying the tensor to be transposed. + }; + + struct Param + { + std::vector<int> perm; + int32_t rank; + }; + +public: + Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Transpose; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_TRANSPOSE_H__ diff --git a/runtime/neurun/core/include/ir/operation/TransposeConv.h b/runtime/neurun/core/include/ir/operation/TransposeConv.h new file mode 100644 index 000000000..a561db4e4 --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/TransposeConv.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_OPERATION_TRANSPOSE_CONV_H__ +#define __NEURUN_IR_OPERATION_TRANSPOSE_CONV_H__ + +#include <memory> + +#include "ir/Operation.h" +#include "ir/InternalType.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +class TransposeConv : public Operation +{ +public: + enum Input + { + OUTPUT_SHAPE = 0, + KERNEL, + INPUT + }; + + struct Param + { + Padding padding; + Stride stride; + }; + +public: + TransposeConv(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::TransposeConv; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_OPERATION_TRANSPOSE_CONV_H__ diff --git a/runtime/neurun/core/include/ir/operation/Unpack.h b/runtime/neurun/core/include/ir/operation/Unpack.h new file mode 100644 index 000000000..fa698d3af --- /dev/null +++ b/runtime/neurun/core/include/ir/operation/Unpack.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __NEURUN_IR_OPERATION_UNPACK_H__ +#define __NEURUN_IR_OPERATION_UNPACK_H__ + +#include "ir/Operation.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ +class Unpack : public Operation +{ +public: + enum Input + { + INPUT = 0 + }; + + struct Param + { + int32_t num; + int32_t axis; + int32_t rank; + }; + +public: + Unpack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Unpack; } + +public: + const Param ¶m() const { return _param; } + +private: + Param _param; +}; +} // namespace operation +} // namespace ir +} // namespace neurun +#endif // __NEURUN_IR_OPERATION_UNPACK_H__ diff --git a/runtime/neurun/core/include/util/Config.lst b/runtime/neurun/core/include/util/Config.lst new file mode 100644 index 000000000..046a0c4a8 --- /dev/null +++ b/runtime/neurun/core/include/util/Config.lst @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CONFIG +#error Define CONFIG before including this file +#endif + +// Name | Type | Default +CONFIG(GRAPH_DOT_DUMP , int , "0") +CONFIG(BACKENDS , std::string , "acl_cl;acl_neon;cpu;srcn") +CONFIG(OP_BACKEND_ALLOPS , std::string , "") +CONFIG(OP_BACKEND_MAP , std::string , "") +CONFIG(DISABLE_COMPILE , bool , "0") +CONFIG(NEURUN_LOG_ENABLE , bool , "0") +CONFIG(CPU_MEMORY_PLANNER , std::string , "WIC") +CONFIG(EXECUTOR , std::string , "Linear") +CONFIG(ACL_LAYOUT , std::string , "none") +CONFIG(NCNN_LAYOUT , std::string , "NCHW") +CONFIG(PROFILING_MODE , bool , "0") +CONFIG(USE_SCHEDULER , bool , "0") +CONFIG(SUBG_MAX_NODE , int , "0") +CONFIG(TRACE_FILEPATH , std::string , "") + +// Auto-generate all operations + +#define OP(InternalName) \ + CONFIG(OP_BACKEND_ ## InternalName, std::string, "") +#include "ir/Operations.lst" +#undef OP + diff --git a/runtime/neurun/core/include/util/ConfigSource.h b/runtime/neurun/core/include/util/ConfigSource.h new file mode 100644 index 000000000..b1fa9a87d --- /dev/null +++ b/runtime/neurun/core/include/util/ConfigSource.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_UTIL_CONFIG_SOURCE_H__ +#define __NEURUN_UTIL_CONFIG_SOURCE_H__ + +#include <memory> + +#include "IConfigSource.h" + +namespace neurun +{ +namespace util +{ + +void config_source(std::unique_ptr<IConfigSource> &&source); + +bool getConfigBool(const std::string &key); +int getConfigInt(const std::string &key); +std::string getConfigString(const std::string &key); + +} // namespace util +} // namespace neurun + +namespace neurun +{ +namespace util +{ +namespace config +{ + +#define CONFIG(Name, Type, Default) extern const char *Name; + +#include "Config.lst" + +#undef CONFIG + +} // namespace config +} // namespace util +} // namespace neurun + +#endif // __NEURUN_UTIL_CONFIG_SOURCE_H__ diff --git a/runtime/neurun/core/include/util/Coordinates.h b/runtime/neurun/core/include/util/Coordinates.h new file mode 100644 index 000000000..67947138f --- /dev/null +++ b/runtime/neurun/core/include/util/Coordinates.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_UTIL_COORDINATES_H__ +#define __NEURUN_UTIL_COORDINATES_H__ + +#include <cassert> +#include <stdint.h> +#include <vector> + +namespace neurun +{ +namespace util +{ + +/** + * @brief Class to represent position(offset) of tensor.\n + * Assume that the front is higher dimensional. + * i.g. N: 0, C: 1, H: 2, W: 3 for NCHW layout + */ +class Coordinates final +{ +public: + static constexpr size_t num_max_dimensions = 4; + +public: + /** + * @brief Construct a new Coordinates object + * @param[in] init The initialzer_list with coordinates + * @return + */ + Coordinates(std::initializer_list<int32_t> init) : _coordinates{init} + { + assert(init.size() <= num_max_dimensions); + } + +public: + /** + * @brief Set the coordinate of one of the coordinates. + * + * @param[in] dimension Dimension for which the coordinate is set. + * @param[in] Coordinate Coordinate to be set for the dimension. + */ + void set(size_t dimension, int32_t coordinate) + { + assert(dimension < num_max_dimensions); + if (dimension >= _coordinates.size()) + { + _coordinates.resize(dimension + 1, 0); + } + _coordinates[dimension] = coordinate; + } + +public: + /** + * @brief Return size of coordinates + * + * @return size of coordinates + */ + size_t size() const { return _coordinates.size(); } + +public: + int32_t operator[](size_t dimension) const + { + assert(dimension < _coordinates.size()); + return _coordinates[dimension]; + } + +public: + /** + * @brief begin() of const_iterator for this class + * + * @return The first iterator of the coordinates + */ + std::vector<int32_t>::const_iterator begin() const { return _coordinates.begin(); } + /** + * @brief end() of const_iterator for this class + * + * @return The last iterator of the coordinates + */ + std::vector<int32_t>::const_iterator end() const { return _coordinates.end(); } + +private: + std::vector<int32_t> _coordinates; +}; + +} // namespace util +} // namespace neurun + +#endif // __NEURUN_UTIL_COORDINATES_H__ diff --git a/runtime/neurun/core/include/util/EnvConfigSource.h b/runtime/neurun/core/include/util/EnvConfigSource.h new file mode 100644 index 000000000..77be15c4e --- /dev/null +++ b/runtime/neurun/core/include/util/EnvConfigSource.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_UTIL_ENV_CONFIG_SOURCE_H__ +#define __NEURUN_UTIL_ENV_CONFIG_SOURCE_H__ + +#include <unordered_map> + +#include "util/GeneralConfigSource.h" + +namespace neurun +{ +namespace util +{ + +class EnvConfigSource final : public GeneralConfigSource +{ +public: + std::string get(const std::string &key) const override; + +private: + std::unordered_map<std::string, std::string> _default_attributes; +}; + +} // namespace util +} // namespace neurun + +#endif // __NEURUN_UTIL_ENV_CONFIG_SOURCE_H__ diff --git a/runtime/neurun/core/include/util/EventCollectorGlobal.h b/runtime/neurun/core/include/util/EventCollectorGlobal.h new file mode 100644 index 000000000..15e40844e --- /dev/null +++ b/runtime/neurun/core/include/util/EventCollectorGlobal.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_UTIL_EVENT_COLLECTOR_GLOBAL_H__ +#define __NEURUN_UTIL_EVENT_COLLECTOR_GLOBAL_H__ + +#include "misc/EventRecorder.h" +#include "misc/EventCollector.h" + +namespace neurun +{ +namespace util +{ + +/** + * @brief Singleton class for event collection from anywhere in code + * + */ +class EventCollectorGlobal +{ +public: + /** + * @brief Get the singleton object of this class + * + * @return EventCollectorGlobal& Singleton object + */ + static EventCollectorGlobal &get(); + +public: + /** + * @brief Getter for event collector object + * + * @return EventCollector& Collector object + */ + EventCollector &collector() { return _collector; } + +private: + EventCollectorGlobal(); + ~EventCollectorGlobal(); + +private: + EventRecorder _recorder; + EventCollector _collector; +}; + +/** + * @brief Helper class for emitting duration event which is handled automatically with ctor/dtor + * + */ +class EventDurationBlock +{ +public: + /** + * @brief Raise a duration event with type of BEGIN + * + * @param tag A label for the duration event + */ + EventDurationBlock(const std::string &tag); + /** + * @brief Raise a duration event with type of END + * + */ + ~EventDurationBlock(); + +private: + std::string _tag; +}; + +/** + * @brief Helper class for emitting duration event which is handled manually + * + * Usage: + * { + * ... + * EventDurationManual duration("some tag"); + * duration.begin(); + * ... + * ... // Code for duration + * ... + * duration.end(); + * } + * + */ +class EventDurationManual +{ +public: + /** + * @brief Construct a new Event Duration Manual object + * + * @param tag A label for the duration object + */ + EventDurationManual(const std::string &tag); + /** + * @brief Destroy the Event Duration Manual object + * + */ + ~EventDurationManual(); + + /** + * @brief Raise a duration event with type of BEGIN + * + */ + void begin(); + /** + * @brief Raise a duration event with type of END + * + */ + void end(); + +private: + std::string _tag; + bool _pair; +}; + +} // namespace util +} // namespace neurun + +/** + * Helper Macro Definitions + * + * HOW TO USE + * + * void f(args) + * { + * EVENT_DURATION_FUNCTION(); + * ... + * if(cond) + * { + * EVENT_DURATION_REGION("if branch"); + * ... + * } + * ... + * } + */ + +#define EVENT_DURATION_FUNCTION() \ + ::neurun::util::EventDurationBlock __event_duration__##__LINE__ { __FUNCTION__ } + +#define EVENT_DURATION_REGION(tag) \ + ::neurun::util::EventDurationBlock __event_duration__##__LINE__ { tag } + +#endif // __NEURUN_UTIL_EVENT_COLLECTOR_GLOBAL_H__ diff --git a/runtime/neurun/core/include/util/GeneralConfigSource.h b/runtime/neurun/core/include/util/GeneralConfigSource.h new file mode 100644 index 000000000..04e3332b3 --- /dev/null +++ b/runtime/neurun/core/include/util/GeneralConfigSource.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_UTIL_GLOBAL_CONFIG_SOURCE_H__ +#define __NEURUN_UTIL_GLOBAL_CONFIG_SOURCE_H__ + +#include <unordered_map> + +#include "util/IConfigSource.h" + +namespace neurun +{ +namespace util +{ + +class GeneralConfigSource : public IConfigSource +{ +public: + GeneralConfigSource() = default; + + std::string get(const std::string &key) const override; + void set(const std::string &key, const std::string &val); + +private: + std::unordered_map<std::string, std::string> _map; +}; + +} // namespace util +} // namespace neurun + +#endif // __NEURUN_UTIL_GLOBAL_CONFIG_SOURCE_H__ diff --git a/runtime/neurun/core/include/util/IConfigSource.h b/runtime/neurun/core/include/util/IConfigSource.h new file mode 100644 index 000000000..a52d87097 --- /dev/null +++ b/runtime/neurun/core/include/util/IConfigSource.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_UTIL_I_CONFIG_SOURCE_H__ +#define __NEURUN_UTIL_I_CONFIG_SOURCE_H__ + +#include <string> + +namespace neurun +{ +namespace util +{ + +struct IConfigSource +{ + /** + * @brief Destroy the IConfigSource object + */ + virtual ~IConfigSource() = default; + + /** + * @brief get the value for the matching key + * + * @param key string key to search + * @return string value associated with the key + */ + virtual std::string get(const std::string &key) const = 0; +}; + +} // namespace util +} // namespace neurun + +#endif // __NEURUN_UTIL_I_CONFIG_SOURCE_H__ diff --git a/runtime/neurun/core/include/util/ITimer.h b/runtime/neurun/core/include/util/ITimer.h new file mode 100644 index 000000000..79ecdd0ca --- /dev/null +++ b/runtime/neurun/core/include/util/ITimer.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_UTIL_ITIMER_H__ +#define __NEURUN_UTIL_ITIMER_H__ + +#include <chrono> + +namespace neurun +{ +namespace util +{ + +class ITimer +{ +public: + virtual void handleBegin() = 0; + virtual void handleEnd() = 0; + int getTime() { return _timer_res; }; + + virtual ~ITimer() = default; + +protected: + int _timer_res{0}; +}; + +class CPUTimer : public ITimer +{ +public: + void handleBegin() override { _start_time = std::chrono::steady_clock::now(); }; + + void handleEnd() override + { + const auto end_time = std::chrono::steady_clock::now(); + _timer_res = + std::chrono::duration_cast<std::chrono::microseconds>(end_time - _start_time).count(); + }; + +private: + std::chrono::steady_clock::time_point _start_time; // in microseconds +}; + +} // namespace util +} // namespace neurun + +#endif // __NEURUN_UTIL_ITIMER_H__ diff --git a/runtime/neurun/core/include/util/Index.h b/runtime/neurun/core/include/util/Index.h new file mode 100644 index 000000000..bd8eeb38c --- /dev/null +++ b/runtime/neurun/core/include/util/Index.h @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_UTIL_INDEX_H__ +#define __NEURUN_UTIL_INDEX_H__ + +#include <functional> +#include <limits> +#include <stdint.h> + +namespace neurun +{ +namespace util +{ + +/** + * @brief A wrapper class for unsigned integral Index + * NOTE : Max value of the underlying type is used as the invalid value + * + * @tparam T Underlying type. Must be unsigned integral type otherwise its behavior is undefined. + * @tparam DummyTag Dummy type to distinguish types with a same underlying type. Using an opaque + * type is recommended. + */ +template <typename T, typename DummyTag> class Index +{ +private: + static const T UNDEFINED = std::numeric_limits<T>::max(); + +public: + /** + * @brief Construct a new Index object + */ + explicit Index(void) : _index{UNDEFINED} {} + /** + * @brief Construct a new Index object with a value in the underlying type + * + * @param o Value in the underlying type + */ + explicit Index(const T o) : _index{o} {} + /** + * @brief Copy Constructor + * + * @param o Object to be copied + */ + Index(const Index &o) = default; + + /** + * @brief Assign a value in the underlying time + * + * @param o Value in the underlying type + * @return Index& Reference of this pointer + */ + Index &operator=(const T o) + { + _index = o; + return *this; + } + + /** + * @brief Copy assignment operator + * + * @param o Object to be copied + * @return Index& Reference of this pointer + */ + Index &operator=(const Index &o) = default; + + /** + * @brief Equality operator + * + * @param o The other value in the underlying type to compare + * @return true if underlying value is the same, false otherwise + */ + bool operator==(T o) const { return _index == o; } + /** + * @brief Equality operator + * + * @param o The other object to compare + * @return true if underlying value is the same, false otherwise + */ + bool operator==(const Index &o) const { return _index == o._index; } + /** + * @brief Inquality operator + * + * @param o The other value in the underlying type to compare + * @return true if underlying value is different, false otherwise + */ + bool operator!=(T o) const { return !(*this == o); } + /** + * @brief Inquality operator + * + * @param o The other object to compare + * @return true if underlying value is different, false otherwise + */ + bool operator!=(const Index &o) const { return !(*this == o); } + + /** + * @brief Post increment operator + * + * @return Index Index before increment + */ + Index operator++(int) + { + Index temp = *this; + _index++; + return temp; + } + + /** + * @brief Check whether the value is valid or not + * + * @return true if valid, false otherwise + */ + bool valid() const { return _index != UNDEFINED; } + /** + * @brief Return underlying value + * + * @return T Underlying value + */ + T value() const { return _index; } + +private: + T _index; +}; + +} // namespace util +} // namespace neurun + +namespace std +{ + +template <typename T, typename Tag> struct hash<::neurun::util::Index<T, Tag>> +{ + size_t operator()(const ::neurun::util::Index<T, Tag> &index) const noexcept + { + return hash<T>()(index.value()); + } +}; + +} // namespace std + +#endif // __NEURUN_UTIL_INDEX_H__ diff --git a/runtime/neurun/core/include/util/ObjectManager.h b/runtime/neurun/core/include/util/ObjectManager.h new file mode 100644 index 000000000..fd2c3f295 --- /dev/null +++ b/runtime/neurun/core/include/util/ObjectManager.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_UTIL_OBJECT_MANAGER_H__ +#define __NEURUN_UTIL_OBJECT_MANAGER_H__ + +#include <unordered_map> +#include <memory> + +namespace neurun +{ +namespace util +{ + +/** + * @brief Class that owns objects and maps them with indices as a handle for them + * + */ +template <typename Index, typename Object> class ObjectManager +{ +public: + ObjectManager() : _index_count{0u} {} + +public: + /** + * @brief Create an object with args and put it in the container with a new Index for that + * + * @param[in] args Arguments for creating Operand object + * @return Created index that is associated to the object + */ + template <class... Args> Index emplace(Args &&... args) + { + auto index = generateIndex(); + _objects.emplace(index, nnfw::cpp14::make_unique<Object>(std::forward<Args>(args)...)); + return index; + } + + /** + * @brief Put object in the container with a new Index for that + * + * @param[in] object Object to be pushed + * @return Created index that is associated to the object + */ + Index push(std::unique_ptr<Object> &&object) + { + auto index = generateIndex(); + _objects.emplace(index, std::move(object)); + return index; + } + + /** + * @brief Remove the object that is associated with the given index + * + * @param[in] index Index of the object to be removed + * @return N/A + */ + void remove(const Index &index) { _objects.erase(index); }; + + /** + * @brief Get the object that is associated with the given index + * + * @param[in] index Index of the object to be returned + * @return Object + */ + const Object &at(const Index &index) const { return *(_objects.at(index)); } + /** + * @brief Get the object that is associated with the given index + * + * @param[in] index Index of the object to be returned + * @return Object + */ + Object &at(const Index &index) { return *(_objects.at(index)); } + /** + * @brief Get the object that is associated with the given index + * + * @param[in] index Index of the object to be returned + * @return true if such entry exists otherwise false + */ + bool exist(const Index &index) const + { + auto it = _objects.find(index); + return it != _objects.end(); + } + /** + * @brief Iterate over the container with given function + * + * @param[in] fn Function to be run for every container entry + * @return N/A + */ + void iterate(const std::function<void(const Index &, const Object &)> &fn) const + { + for (const auto &e : _objects) + { + fn(e.first, *e.second); + } + } + /** + * @brief Iterate over the container with given function + * + * @param[in] fn Function to be run for every container entry + * @return N/A + */ + void iterate(const std::function<void(const Index &, Object &)> &fn) + { + // TODO Remove this workaround + // This implementation is a workaround in case of adding operands while iteration + std::list<Index> l; + + for (auto &e : _objects) + { + l.push_back(e.first); + } + + for (auto index : l) + { + fn(index, *_objects[index]); + } + } + +private: + Index generateIndex() { return Index{_index_count++}; } + +private: + std::unordered_map<Index, std::unique_ptr<Object>> _objects; + uint32_t _index_count; +}; + +} // namespace util +} // namespace neurun + +#endif // __NEURUN_UTIL_OBJECT_MANAGER_H__ diff --git a/runtime/neurun/core/include/util/Padding.h b/runtime/neurun/core/include/util/Padding.h new file mode 100644 index 000000000..3c707b873 --- /dev/null +++ b/runtime/neurun/core/include/util/Padding.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_UTIL_PADDING_H__ +#define __NEURUN_UTIL_PADDING_H__ + +#include <stdint.h> + +#include "ir/Shape.h" +#include "ir/InternalType.h" + +namespace neurun +{ +namespace util +{ + +ir::ExplicitPadding validPadding(void); +ir::ExplicitPadding samePadding(const ir::FeatureShape &ifm_shape, + const ir::FeatureShape &ofm_shape, const ir::Stride &stride, + uint32_t kw, uint32_t kh); +ir::ExplicitPadding calculatePadding(const ir::Padding &padding, const ir::FeatureShape &ifm_shape, + const ir::FeatureShape &ofm_shape, const ir::Stride &stride, + uint32_t kw, uint32_t kh); + +} // namespace util +} // namespace neurun + +#endif // __NEURUN_UTIL_PADDING_H__ diff --git a/runtime/neurun/core/include/util/Set.h b/runtime/neurun/core/include/util/Set.h new file mode 100644 index 000000000..13213511d --- /dev/null +++ b/runtime/neurun/core/include/util/Set.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Set.h + * @brief This file contains neurun::util::Set class + * @ingroup COM_AI_RUNTIME + */ + +#ifndef __NEURUN_UTIL_SET_H__ +#define __NEURUN_UTIL_SET_H__ + +#include <cassert> +#include <unordered_set> + +namespace neurun +{ +namespace util +{ + +/** + * @brief Class for set of custom element + & @tparam Element Key type of Set + */ +template <typename Element> class Set +{ +public: + /** + * @brief Construct default Set object. + */ + Set() = default; + /** + * @brief Construct Set object by copy semantics. + */ + Set(const Set<Element> &) = default; + /** + * @brief Construct move Set object by move semantics. + */ + Set(Set<Element> &&) = default; + +public: + /** + * @brief Add a given element to the set + * + * @param e Element added + */ + void add(const Element &e) { _set.insert(e); } + /** + * @brief remove a given element from the set + * + * @param e Element removed + */ + void remove(const Element &e) { _set.erase(e); } + /** + * @brief Get size of the set + * + * @return The size of the set + */ + uint32_t size() const { return static_cast<uint32_t>(_set.size()); } + /** + * @brief Get whether the set is empty + * + * @return Whether the set is empty + */ + bool empty() const { return _set.empty(); } + /** + * @brief Get whether a given element exists in the set + * + * @param e A given element + * + * @return Whether a given element exists in the set + */ + bool contains(const Element &e) const { return _set.find(e) != _set.end(); } + /** + * @brief Get first element of the set + * + * @return first element of the set + */ + const Element &getOnlyElement() const + { + assert(_set.size() == 1u); + return *_set.begin(); + } + +public: + /** + * @brief operator overloading function for `|` + * + * @return A set with two sets combined + */ + Set<Element> operator|(const Set<Element> &other) const // Union + { + auto ret = *this; + for (auto e : other) + { + ret.add(e); + } + return ret; + } + /** + * @brief operator overloading function for `&` + * + * @return A set of elements that overlap in two sets + */ + Set<Element> operator&(const Set<Element> &other) const // Intersect + { + Set<Element> ret; + for (auto e : other) + { + if (contains(e)) + { + ret.add(e); + } + } + return ret; + } + /** + * @brief operator overloading function for `-` + * + * @return A set of subtracted from another set + */ + Set<Element> operator-(const Set<Element> &other) const // Minus + { + auto ret = *this; + for (auto e : other) + { + ret.remove(e); + } + return ret; + } + +public: + /** + * @brief begin() of const_iterator for this class + * + * @return The first iterator of the set + */ + typename std::unordered_set<Element>::const_iterator begin() const { return _set.begin(); } + /** + * @brief end() of const_iterator for this class + * + * @return The last iterator of the set + */ + typename std::unordered_set<Element>::const_iterator end() const { return _set.end(); } + +private: + std::unordered_set<Element> _set; +}; + +} // namespace util +} // namespace neurun + +#endif // __NEURUN_UTIL_SET_H__ diff --git a/runtime/neurun/core/include/util/ShapeInference.h b/runtime/neurun/core/include/util/ShapeInference.h new file mode 100644 index 000000000..097d61643 --- /dev/null +++ b/runtime/neurun/core/include/util/ShapeInference.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_GRAPH_SHAPE_INFERENCE_H__ +#define __NEURUN_GRAPH_SHAPE_INFERENCE_H__ + +#include "ir/operation/AvgPool2D.h" +#include "ir/operation/Concat.h" +#include "ir/operation/MaxPool2D.h" +#include "ir/operation/Conv2D.h" +#include "ir/operation/DepthwiseConv2D.h" +#include "ir/Operands.h" +#include "ir/Index.h" +#include "ir/Layout.h" + +namespace neurun +{ +namespace shape_inference +{ + +using Shapes = std::vector<ir::Shape>; + +Shapes inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape); + +Shapes inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param ¶m, + ir::Layout layout = ir::Layout::NHWC); + +Shapes inferConcatShape(const Shapes &in_shapes, const ir::operation::Concat::Param ¶m); + +Shapes inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param ¶m, + ir::Layout layout = ir::Layout::NHWC); + +Shapes inferConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape, + const ir::operation::Conv2D::Param ¶m, + ir::Layout layout = ir::Layout::NHWC); + +Shapes inferDepthwiseConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape, + const ir::operation::DepthwiseConv2D::Param ¶m, + ir::Layout layout = ir::Layout::NHWC); + +Shapes inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape); + +} // namespace shape_inference +} // namespace neurun + +#endif // __NEURUN_GRAPH_SHAPE_INFERENCE_H__ diff --git a/runtime/neurun/core/include/util/Utils.h b/runtime/neurun/core/include/util/Utils.h new file mode 100644 index 000000000..e7468dabd --- /dev/null +++ b/runtime/neurun/core/include/util/Utils.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Utils.h + * @brief This file contains utility functions + * @ingroup COM_AI_RUNTIME + */ + +#ifndef __NEURUN_UTIL_UTILS_H__ +#define __NEURUN_UTIL_UTILS_H__ + +#include "ir/InternalType.h" +#include "ir/Layout.h" +#include "ir/Operand.h" +#include "util/Coordinates.h" + +#define UNUSED_RELEASE(a) (void)(a) + +namespace neurun +{ +namespace util +{ + +/** + * @brief Converts a internal padding type to const char* + * @param[in] type Padding type to be converted + * @return A string holding the converted value + */ +const char *to_string(ir::PaddingType type); + +Coordinates convertCoordinates(const Coordinates &from_coordinates, ir::Layout from_layout, + ir::Layout to_layout); + +} // namespace util +} // namespace neurun + +#endif // __NEURUN_UTIL_UTILS_H__ diff --git a/runtime/neurun/core/include/util/feature/Coordinate4D.h b/runtime/neurun/core/include/util/feature/Coordinate4D.h new file mode 100644 index 000000000..b020ed239 --- /dev/null +++ b/runtime/neurun/core/include/util/feature/Coordinate4D.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_UTIL_FEATURE_COORDINATE_4D_H__ +#define __NEURUN_UTIL_FEATURE_COORDINATE_4D_H__ + +#include <stdint.h> + +namespace neurun +{ +namespace util +{ +namespace feature +{ + +/** + * @brief Class to represent position(offset) of subtensor.\n + * Assume that parent and child are already lowered (can get Shape4D). + */ +class Coordinate4D +{ +public: + /** + * @brief Construct a new Coordinate4D object + */ + Coordinate4D(void) : _n{0}, _h{0}, _w{0}, _c{0} + { + // DO NOTHING + } + /** + * @brief Construct a new Coordinate4D object + * @param[in] n Batch offset + * @param[in] h Height offset + * @param[in] w Width offset + * @param[in] c Channel offset + * @return + */ + Coordinate4D(int32_t n, int32_t h, int32_t w, int32_t c) : _n{n}, _h{h}, _w{w}, _c{c} + { + // DO NOTHING + } + +public: + /** + * @brief Set batch offset + * @param[in] n Batch offset + */ + void n(int32_t n) { _n = n; } + /** + * @brief Set height offset + * @param[in] h Height offset + */ + void h(int32_t h) { _h = h; } + /** + * @brief Set width offset + * @param[in] w Width offset + */ + void w(int32_t w) { _w = w; } + /** + * @brief Set channel offset + * @param[in] c Channel offset + */ + void c(int32_t c) { _c = c; } + +public: + /** + * @brief Return batch offset + * @return Batch offset + */ + int32_t n(void) const { return _n; } + /** + * @brief Return height offset + * @return Height offset + */ + int32_t h(void) const { return _h; } + /** + * @brief Return width offset + * @return Width offset + */ + int32_t w(void) const { return _w; } + /** + * @brief Return channel offset + * @return Channel offset + */ + int32_t c(void) const { return _c; } + +private: + int32_t _n; + int32_t _h; + int32_t _w; + int32_t _c; +}; + +} // namespace feature +} // namespace util +} // namespace neurun + +#endif // __NEURUN_UTIL_FEATURE_COORDINATE_4D_H__ diff --git a/runtime/neurun/core/include/util/feature/nchw/Reader.h b/runtime/neurun/core/include/util/feature/nchw/Reader.h new file mode 100644 index 000000000..0305bdf69 --- /dev/null +++ b/runtime/neurun/core/include/util/feature/nchw/Reader.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_UTIL_FEATURE_NCHW_READER_H__ +#define __NEURUN_UTIL_FEATURE_NCHW_READER_H__ + +#include <cassert> + +#include "backend/operand/ITensor.h" +#include "misc/feature/Reader.h" +#include "misc/feature/Shape.h" +#include "util/Coordinates.h" +#include "util/Utils.h" + +namespace neurun +{ +namespace util +{ +namespace feature +{ +namespace nchw +{ + +template <typename T> class Reader final : public nnfw::misc::feature::Reader<T> +{ +public: + // Construct for buffer of model inputs + Reader(const ::nnfw::misc::feature::Shape &shape, const T *ptr, size_t len) + : _shape{shape}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len} + { + assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len); + + // No padding + _strides.W = sizeof(T); + _strides.H = shape.W * sizeof(T); + _strides.C = shape.W * shape.H * sizeof(T); + _strides.N = shape.W * shape.H * shape.C * sizeof(T); + } + + // Construct for backend tensor + Reader(backend::operand::ITensor *tensor) + : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()} + { + assert(tensor->layout() == ir::Layout::NCHW); + + const auto start_offset = tensor->calcOffset({0, 0, 0, 0}); + _strides.W = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset; + _strides.H = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset; + _strides.C = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset; + _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset; + + _shape.W = tensor->dimension(3); + _shape.H = tensor->dimension(2); + _shape.C = tensor->dimension(1); + _shape.N = tensor->dimension(0); + } + +public: + T at(uint32_t ch, uint32_t row, uint32_t col) const override + { + const auto offset = feature_index_to_byte_offset(0, ch, row, col); + + const T *ptr = reinterpret_cast<const T *>(_ptr + offset); + + return *ptr; + } + T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override + { + const auto offset = feature_index_to_byte_offset(batch, ch, row, col); + + const T *ptr = reinterpret_cast<const T *>(_ptr + offset); + + return *ptr; + } + +private: + size_t feature_index_to_byte_offset(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const + { + assert(1u * _shape.N > batch); // shape.N > batch + assert(1u * _shape.C > ch); // shape.C > ch + assert(1u * _shape.H > row); // shape.H > row + assert(1u * _shape.W > col); // shape.W > col + + uint32_t res = 0; + res += batch * _strides.N; + res += ch * _strides.C; + res += row * _strides.H; + res += col * _strides.W; + + return res; + } + +private: + // TODO Remove _shape + nnfw::misc::feature::Shape _shape; + using Strides = nnfw::misc::feature::Shape; + Strides _strides; + const uint8_t *_ptr; + size_t _len; +}; + +} // namespace nchw +} // namespace feature +} // namespace util +} // namespace neurun + +#endif // __NEURUN_UTIL_FEATURE_NCHW_READER_H__ diff --git a/runtime/neurun/core/include/util/feature/nchw/View.h b/runtime/neurun/core/include/util/feature/nchw/View.h new file mode 100644 index 000000000..d747937ee --- /dev/null +++ b/runtime/neurun/core/include/util/feature/nchw/View.h @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_UTIL_FEATURE_NCHW_VIEW_H__ +#define __NEURUN_UTIL_FEATURE_NCHW_VIEW_H__ + +#include "misc/feature/Reader.h" +#include "misc/feature/Shape.h" + +#include "backend/operand/ITensor.h" +#include "util/Coordinates.h" +#include "util/Utils.h" + +#include <cassert> + +namespace neurun +{ +namespace util +{ +namespace feature +{ +namespace nchw +{ + +template <typename T> class View final : public nnfw::misc::feature::Reader<T> +{ +public: + // Construct for buffer of model inputs + View(const ::nnfw::misc::feature::Shape &shape, T *ptr, size_t len) + : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len} + { + assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len); + + _strides.W = sizeof(T); + _strides.H = shape.W * sizeof(T); + _strides.C = shape.W * shape.H * sizeof(T); + _strides.N = shape.W * shape.H * shape.C * sizeof(T); + } + + // Construct for backend tensor + View(::neurun::backend::operand::ITensor *tensor) + : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()} + { + assert(tensor->layout() == ir::Layout::NCHW); + + const auto start_offset = tensor->calcOffset({0, 0, 0, 0}); + _strides.W = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset; + _strides.H = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset; + _strides.C = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset; + _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset; + + _shape.W = tensor->dimension(3); + _shape.H = tensor->dimension(2); + _shape.C = tensor->dimension(1); + _shape.N = tensor->dimension(0); + } + +public: + T at(uint32_t ch, uint32_t row, uint32_t col) const override + { + const auto offset = feature_index_to_byte_offset(0, ch, row, col); + + T *ptr = reinterpret_cast<T *>(_ptr + offset); + + return *ptr; + } + T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override + { + const auto offset = feature_index_to_byte_offset(batch, ch, row, col); + + T *ptr = reinterpret_cast<T *>(_ptr + offset); + + return *ptr; + } + +public: + T &at(uint32_t ch, uint32_t row, uint32_t col) + { + const auto offset = feature_index_to_byte_offset(0, ch, row, col); + + T *ptr = reinterpret_cast<T *>(_ptr + offset); + + return *ptr; + } + T &at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) + { + const auto offset = feature_index_to_byte_offset(batch, ch, row, col); + + T *ptr = reinterpret_cast<T *>(_ptr + offset); + + return *ptr; + } + +private: + size_t feature_index_to_byte_offset(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const + { + assert(1u * _shape.N > batch); // shape.N > batch + assert(1u * _shape.C > ch); // shape.C > ch + assert(1u * _shape.H > row); // shape.H > row + assert(1u * _shape.W > col); // shape.W > col + + uint32_t res = 0; + res += batch * _strides.N; + res += ch * _strides.C; + res += row * _strides.H; + res += col * _strides.W; + + return res; + } + +private: + // TODO Remove _shape + nnfw::misc::feature::Shape _shape; + using Strides = nnfw::misc::feature::Shape; + Strides _strides; + uint8_t *_ptr; + size_t _len; +}; + +} // namespace nchw +} // namespace feature +} // namespace util +} // namespace neurun + +#endif // __NEURUN_UTIL_FEATURE_NCHW_VIEW_H__ diff --git a/runtime/neurun/core/include/util/feature/nhwc/Reader.h b/runtime/neurun/core/include/util/feature/nhwc/Reader.h new file mode 100644 index 000000000..0df7be4be --- /dev/null +++ b/runtime/neurun/core/include/util/feature/nhwc/Reader.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_UTIL_FEATURE_NHWC_READER_H__ +#define __NEURUN_UTIL_FEATURE_NHWC_READER_H__ + +#include <cassert> + +#include "backend/operand/ITensor.h" +#include "misc/feature/Reader.h" +#include "misc/feature/Shape.h" +#include "util/Coordinates.h" +#include "util/Utils.h" + +namespace neurun +{ +namespace util +{ +namespace feature +{ +namespace nhwc +{ + +template <typename T> class Reader final : public nnfw::misc::feature::Reader<T> +{ +public: + // Construct for buffer of model inputs + Reader(const ::nnfw::misc::feature::Shape &shape, const T *ptr, size_t len) + : _shape{shape}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len} + { + UNUSED_RELEASE(len); // Workaround for unused variable in release mode + assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len); + + // No padding + _strides.C = sizeof(T); + _strides.W = shape.C * sizeof(T); + _strides.H = shape.C * shape.W * sizeof(T); + _strides.N = shape.C * shape.W * shape.H * sizeof(T); + } + + // Construct for backend tensor + Reader(const backend::operand::ITensor *tensor) + : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()} + { + assert(tensor->layout() == ir::Layout::NHWC); + + const auto start_offset = tensor->calcOffset({0, 0, 0, 0}); + _strides.C = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset; + _strides.W = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset; + _strides.H = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset; + _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset; + + _shape.C = tensor->dimension(3); + _shape.W = tensor->dimension(2); + _shape.H = tensor->dimension(1); + _shape.N = tensor->dimension(0); + } + +public: + T at(uint32_t row, uint32_t col, uint32_t ch) const override + { + const auto offset = feature_index_to_byte_offset(0, row, col, ch); + + const T *ptr = reinterpret_cast<const T *>(_ptr + offset); + + return *ptr; + } + T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override + { + const auto offset = feature_index_to_byte_offset(batch, row, col, ch); + + const T *ptr = reinterpret_cast<const T *>(_ptr + offset); + + return *ptr; + } + +private: + size_t feature_index_to_byte_offset(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const + { + assert(1u * _shape.N > batch); // shape.N > batch + assert(1u * _shape.H > row); // shape.H > row + assert(1u * _shape.W > col); // shape.W > col + assert(1u * _shape.C > ch); // shape.C > ch + + uint32_t res = 0; + res += batch * _strides.N; + res += row * _strides.H; + res += col * _strides.W; + res += ch * _strides.C; + + return res; + } + +private: + // TODO Remove _shape + nnfw::misc::feature::Shape _shape; + using Strides = nnfw::misc::feature::Shape; + Strides _strides; + const uint8_t *_ptr; + size_t _len; +}; + +} // namespace nhwc +} // namespace feature +} // namespace util +} // namespace neurun + +#endif // __NEURUN_UTIL_FEATURE_NHWC_READER_H__ diff --git a/runtime/neurun/core/include/util/feature/nhwc/View.h b/runtime/neurun/core/include/util/feature/nhwc/View.h new file mode 100644 index 000000000..b9d98e9fa --- /dev/null +++ b/runtime/neurun/core/include/util/feature/nhwc/View.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_UTIL_FEATURE_NHWC_VIEW_H__ +#define __NEURUN_UTIL_FEATURE_NHWC_VIEW_H__ + +#include <cassert> +#include <cstddef> + +#include "backend/operand/ITensor.h" +#include "misc/feature/Reader.h" +#include "misc/feature/Shape.h" +#include "util/Coordinates.h" +#include "util/Utils.h" + +namespace neurun +{ +namespace util +{ +namespace feature +{ +namespace nhwc +{ + +template <typename T> class View final : public nnfw::misc::feature::Reader<T> +{ +public: + // Construct for buffer of model inputs + View(const ::nnfw::misc::feature::Shape &shape, T *ptr, size_t len) + : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len} + { + UNUSED_RELEASE(len); // Workaround for unused variable in release mode + assert(shape.N * shape.H * shape.W * shape.C * sizeof(T) == len); + + // No padding + _strides.C = sizeof(T); + _strides.W = shape.C * sizeof(T); + _strides.H = shape.C * shape.W * sizeof(T); + _strides.N = shape.C * shape.W * shape.H * sizeof(T); + } + + // Construct for backend tensor + View(backend::operand::ITensor *tensor) + : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()} + { + assert(tensor->layout() == ir::Layout::NHWC); + + const auto start_offset = tensor->calcOffset({0, 0, 0, 0}); + _strides.C = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset; + _strides.W = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset; + _strides.H = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset; + _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset; + + _shape.C = tensor->dimension(3); + _shape.W = tensor->dimension(2); + _shape.H = tensor->dimension(1); + _shape.N = tensor->dimension(0); + } + +public: + T at(uint32_t row, uint32_t col, uint32_t ch) const override + { + const auto offset = feature_index_to_byte_offset(0, row, col, ch); + + const T *ptr = reinterpret_cast<const T *>(_ptr + offset); + + return *ptr; + } + T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override + { + const auto offset = feature_index_to_byte_offset(batch, row, col, ch); + + const T *ptr = reinterpret_cast<const T *>(_ptr + offset); + + return *ptr; + } + + T &at(uint32_t row, uint32_t col, uint32_t ch) + { + const auto offset = feature_index_to_byte_offset(0, row, col, ch); + + T *ptr = reinterpret_cast<T *>(_ptr + offset); + + return *ptr; + } + + T &at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) + { + const auto offset = feature_index_to_byte_offset(batch, row, col, ch); + + T *ptr = reinterpret_cast<T *>(_ptr + offset); + + return *ptr; + } + +private: + size_t feature_index_to_byte_offset(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const + { + assert(1u * _shape.N > batch); // shape.N > batch + assert(1u * _shape.H > row); // shape.H > row + assert(1u * _shape.W > col); // shape.W > col + assert(1u * _shape.C > ch); // shape.C > ch + + uint32_t res = 0; + res += batch * _strides.N; + res += row * _strides.H; + res += col * _strides.W; + res += ch * _strides.C; + + return res; + } + +private: + // TODO Remove _shape + nnfw::misc::feature::Shape _shape; + using Strides = nnfw::misc::feature::Shape; + Strides _strides; + uint8_t *_ptr; + size_t _len; +}; + +} // namespace nhwc +} // namespace feature +} // namespace util +} // namespace neurun + +#endif // __NEURUN_UTIL_FEATURE_NHWC_VIEW_H__ diff --git a/runtime/neurun/core/include/util/logging.h b/runtime/neurun/core/include/util/logging.h new file mode 100644 index 000000000..8ecd0ac12 --- /dev/null +++ b/runtime/neurun/core/include/util/logging.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_UTIL_LOGGING_H__ +#define __NEURUN_UTIL_LOGGING_H__ + +#include <iostream> + +#include "util/ConfigSource.h" + +namespace neurun +{ +namespace util +{ +namespace logging +{ + +class Context +{ +public: + Context() noexcept : _enabled{false} + { + const auto env = util::getConfigBool(util::config::NEURUN_LOG_ENABLE); + + if (env) + { + _enabled = true; + } + } + + static Context &get() noexcept; + +public: + bool enabled(void) const { return _enabled; } + +private: + bool _enabled; +}; + +static Context &ctx = Context::get(); + +} // namespace logging +} // namespace util +} // namespace neurun + +#define VERBOSE(name) \ + if (::neurun::util::logging::ctx.enabled()) \ + std::cout << "[" << #name << "] " + +#endif // __NEURUN_UTIL_LOGGING_H__ diff --git a/runtime/neurun/core/src/backend/Backend.cc b/runtime/neurun/core/src/backend/Backend.cc new file mode 100644 index 000000000..c2f745f8f --- /dev/null +++ b/runtime/neurun/core/src/backend/Backend.cc @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/Backend.h" + +#include "backend/IConfig.h" +#include "backend/ITensorBuilder.h" +#include "backend/IKernelGenerator.h" +#include "backend/IShapeFixer.h" + +namespace neurun +{ +namespace backend +{ + +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/core/src/backend/BackendManager.cc b/runtime/neurun/core/src/backend/BackendManager.cc new file mode 100644 index 000000000..32086e8b6 --- /dev/null +++ b/runtime/neurun/core/src/backend/BackendManager.cc @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <memory> +#include <dlfcn.h> +#include "BackendManager.h" + +#include "backend/Backend.h" +#include "backend/IConfig.h" +#include "util/logging.h" +#include "util/ConfigSource.h" +#include "misc/string_helpers.h" + +namespace neurun +{ +namespace backend +{ + +BackendManager &BackendManager::get() +{ + static BackendManager object; + return object; +} + +template <typename T, class... Types> +void BackendManager::loadObjectFromPlugin(std::shared_ptr<T> &object_of_plugin_class, + const std::string obj_creator_func_name, void *handle, + Types &&... args) +{ + T *(*allocate_obj)(Types && ... Args); + // load object creator function + allocate_obj = (T * (*)(Types && ... Args))dlsym(handle, obj_creator_func_name.c_str()); + if (allocate_obj == nullptr) + { + fprintf(stderr, "BackendManager: unable to open function %s: %s\n", + obj_creator_func_name.c_str(), dlerror()); + abort(); + } + + object_of_plugin_class.reset(allocate_obj(args...)); +} + +void BackendManager::loadBackend(const std::string &backend) +{ + const std::string backend_plugin = "libbackend_" + backend + ".so"; + void *handle = dlopen(backend_plugin.c_str(), RTLD_LAZY | RTLD_LOCAL); + if (handle == nullptr) + { + VERBOSE(BackendManager::loadBackend) << "loadBackend failed to load plugin of " + << backend.c_str() << " backend: " << dlerror() + << std::endl; + return; + } + + VERBOSE(BackendManager::loadBackend) << "loaded " << backend_plugin << " as a plugin of " + << backend << " backend\n"; + + { + // load object creator function + auto backend_create = (backend_create_t)dlsym(handle, "neurun_backend_create"); + if (backend_create == nullptr) + { + fprintf(stderr, "BackendManager: unable to open function neurun_backend_create : %s\n", + dlerror()); + abort(); + } + + // load object creator function + auto backend_destroy = (backend_destroy_t)dlsym(handle, "neurun_backend_destroy"); + if (backend_destroy == nullptr) + { + fprintf(stderr, "BackendManager: unable to open function neurun_backend_destroy : %s\n", + dlerror()); + abort(); + } + + auto backend_object = + std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy); + auto backend_object_raw = backend_object.get(); + bool initialized = backend_object->config()->initialize(); // Call initialize here? + if (!initialized) + { + VERBOSE(BackendManager::loadBackend) + << backend.c_str() << " backend initialization failed. Don't use this backend" + << std::endl; + dlclose(handle); + return; + } + _gen_map.emplace(backend_object->config()->id(), std::move(backend_object)); + _available_backends.push_back(backend_object_raw); + } + + // Save backend handle (avoid warning by handle lost without dlclose()) + auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{handle, [](void *h) { dlclose(h); }}; + _handle_map.emplace(backend, std::move(u_handle)); +} + +BackendManager::BackendManager() +{ + const auto backends = util::getConfigString(util::config::BACKENDS); + for (auto &backend_id : nnfw::misc::split(backends, ';')) + { + loadBackend(backend_id); + } + + // No loaded backend + if (_available_backends.empty()) + { + VERBOSE(BackendManager::loadBackend) << "There is no loaded backend\n"; + abort(); + } +} + +Backend *BackendManager::get(const std::string &key) +{ + if (_gen_map.find(key) != _gen_map.end()) + { + return _gen_map.at(key).get(); + } + + return nullptr; +} + +const Backend *BackendManager::get(const std::string &key) const +{ + if (_gen_map.find(key) != _gen_map.end()) + { + return _gen_map.at(key).get(); + } + + return nullptr; +} + +const Backend *BackendManager::getDefault() const { return get("cpu"); } + +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/core/src/backend/BackendManager.h b/runtime/neurun/core/src/backend/BackendManager.h new file mode 100644 index 000000000..9c6483f9d --- /dev/null +++ b/runtime/neurun/core/src/backend/BackendManager.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_BACKEND_MANAGER_H__ +#define __NEURUN_BACKEND_BACKEND_MANAGER_H__ + +#include <memory> +#include <map> + +#include "ir/Operands.h" +#include "backend/Backend.h" + +namespace neurun +{ +namespace backend +{ + +class BackendManager +{ +public: + using backend_create_t = Backend *(*)(); + using backend_destroy_t = void (*)(Backend *); + using dlhandle_destroy_t = void (*)(void *); + + static BackendManager &get(); + +public: + Backend *get(const std::string &key); + const Backend *get(const std::string &key) const; + const Backend *getDefault() const; + const std::vector<const Backend *> &getAll() const { return _available_backends; }; + +private: + BackendManager(); + +private: + std::vector<const Backend *> _available_backends; + std::map<std::string, std::unique_ptr<void, dlhandle_destroy_t>> _handle_map; + std::map<std::string, std::unique_ptr<Backend, backend_destroy_t>> _gen_map; + /** + * @brief Allocate an object of a class of a plugin by loading a plugin function, that does + * allocation, and calling it + * + * @param object_of_plugin_class target object + * @param obj_creator_func_name name of the plugin function, that allocates an object + * @param handle handle of the plugin + * @param args arguments to pass to constructor of the plugin class + * + * @return + */ + template <typename T, class... Types> + void loadObjectFromPlugin(std::shared_ptr<T> &object_of_plugin_class, + const std::string obj_creator_func_name, void *handle, + Types &&... args); + + /** + * @brief load backend plugin + * + * @param backend backend to be loaded + * + * @return + */ + void loadBackend(const std::string &backend); +}; + +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_BACKEND_MANAGER_H__ diff --git a/runtime/neurun/core/src/backend/ExecTime.cc b/runtime/neurun/core/src/backend/ExecTime.cc new file mode 100644 index 000000000..d5aa679d7 --- /dev/null +++ b/runtime/neurun/core/src/backend/ExecTime.cc @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/ExecTime.h" + +#include <fstream> +#include <cassert> +#include <limits> +#include <algorithm> + +namespace neurun +{ +namespace backend +{ + +int64_t ExecTime::getOperationExecTime(const Backend *backend, const std::string &operation, + bool quant, uint32_t op_size) const +{ + auto found_backend = _measurements.find(backend); + if (found_backend == _measurements.end()) + return NOT_FOUND; // no execution time for this backend + + auto found_operation_with_type = found_backend->second.find(operation); + if (found_operation_with_type == found_backend->second.end()) + // no execution time for this operation + return NOT_FOUND; + + auto found_operation = found_operation_with_type->second.find(quant); + if (found_operation == found_operation_with_type->second.end()) + // no execution time for this operation + return NOT_FOUND; + + auto found_size = found_operation->second.find(op_size); + if (found_size != found_operation->second.end()) + return found_size->second; // found execution time + + // Try to interpolate + if (found_operation->second.size() < 2) + // not possible to do linear interpolation + return found_operation->second.begin()->second; + + // if we reach here, then this means, that there is no record, that is equal to op_size + auto upper_bound = found_operation->second.upper_bound(op_size); // > op_size + auto lower_bound = upper_bound; + + if (upper_bound == found_operation->second.end()) // all values <= op_size + { + upper_bound--; + lower_bound = upper_bound; + lower_bound--; + } + else if (upper_bound == found_operation->second.begin()) // all values > op_size + { + upper_bound++; + } + else // op_size between + { + lower_bound--; + } + + // Linear interpolation + const auto x0 = static_cast<int64_t>(lower_bound->first); // size + const auto x1 = static_cast<int64_t>(upper_bound->first); // size + const int64_t y0 = lower_bound->second; // time + const int64_t y1 = upper_bound->second; // time + const auto x = static_cast<int64_t>(op_size); + + int64_t interpolated_value = y0 + (x - x0) * (y1 - y0) / (x1 - x0); + + // In some cases ops with smaller inputs is executed slower than the one + // with larger inputs, more likely because of a backend's load difference + if (interpolated_value < 0 && x > x1) + { + return y0; + } + // It must be non-positive ONLY if it's lesser than both of them + assert(interpolated_value > 0 || x < x0); + + // execution time must be non-negative + return std::max<int64_t>(interpolated_value, 1); +} + +void ExecTime::updateOperationExecTime(const Backend *backend, const std::string &operation, + bool quant, uint32_t op_size, int64_t time) +{ + // If the op is not implemented for some input, it should not be scheduled + const auto &recs = _measurements[backend][operation][quant]; + if (time == getMax() || + std::any_of(recs.begin(), recs.end(), + [](std::pair<const uint32_t, const int64_t> p) { return p.second == getMax(); })) + { + _measurements[backend][operation][quant].clear(); + _measurements[backend][operation][quant].emplace(op_size, getMax()); + } + else + { + auto it = _measurements[backend][operation][quant].emplace(op_size, time); + if (!it.second) + { + // affect of the last measurement is bigger than the previous ones: + // this prefers new metrics than older once, so will adapt backend changes + it.first->second = (it.first->second + time) / 2; + } + } +} + +void ExecTime::updatePermuteTime(const Backend *from_backend, const Backend *to_backend, bool quant, + uint32_t op_size, int64_t time) +{ + updateOperationExecTime(from_backend, to_backend->config()->id(), quant, op_size, time); +} + +int64_t ExecTime::getPermuteTime(const Backend *from_backend, const Backend *to_backend, bool quant, + uint32_t op_size) const +{ + return getOperationExecTime(from_backend, to_backend->config()->id(), quant, op_size); +} + +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/core/src/backend/JSONExecTime.cc b/runtime/neurun/core/src/backend/JSONExecTime.cc new file mode 100644 index 000000000..e2404b2c8 --- /dev/null +++ b/runtime/neurun/core/src/backend/JSONExecTime.cc @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/JSONExecTime.h" +#include "backend/IConfig.h" +#include <fstream> + +namespace neurun +{ +namespace backend +{ +/** + * @brief Helper function for reading string from stream + * + * @param str Output string + * @param stream File stream + */ +void readString(std::string &str, std::ifstream &stream) +{ + str.clear(); + char buf; + while (stream.good()) + { + stream.get(buf); + if (buf == '"') + break; + str.push_back(buf); + } +} + +/** + * @brief Helper function for reading bool from stream + * + * @param quant Output bool + * @param stream File stream + */ +void readBool(bool &quant, std::ifstream &stream) +{ + char buf; + stream.get(buf); + quant = (buf == '1'); + stream.get(buf); +} + +void printString(const std::string &str, std::ofstream &stream) { stream << "\"" << str << "\""; } + +void printBool(bool quant, std::ofstream &stream) { stream << "\"" << quant << "\""; } + +void JSON::readOperation(const std::string &backend, const std::string &operation, bool quant, + std::ifstream &stream) +{ + uint32_t size = 0; + int64_t time = 0; + + std::string int_buf; + char buf; + int number_of_closed_braces = 0; + int number_of_commas = 0; + + while (stream.good()) + { + stream.get(buf); + + switch (buf) + { + case ']': + { + number_of_closed_braces++; + break; + } + case '[': + { + number_of_closed_braces--; + break; + } + default: + { + if (std::isdigit(buf)) + { + int_buf.push_back(buf); + } + break; + } + } + + if (number_of_closed_braces == 1) + break; + + if ((buf == ']' && number_of_closed_braces == 0) || + (buf == ',' && number_of_closed_braces == -1)) + { + switch (number_of_commas % 2) + { + case 0: + { + size = static_cast<uint32_t>(std::atoi(int_buf.c_str())); + break; + } + case 1: + { + time = static_cast<int64_t>(std::atol(int_buf.c_str())); + auto bf = _backends.find(backend); + if (bf != _backends.end()) + { + _measurements[bf->second][operation][quant][size] = time; + } // we ignore the records for unsupported backends + break; + } + } + number_of_commas++; + int_buf.clear(); + } + } +} +void JSON::printOperation(const std::map<uint32_t, int64_t> &operation_info, + std::ofstream &stream) const +{ + for (const auto &items : operation_info) + { + stream << "[" << items.first << ", " << items.second << "], "; + } + stream.seekp(-2, std::ofstream::end); +} + +void JSON::uploadOperationsExecTime() const +{ + std::ofstream stream(_measurement_file); + if (!stream.is_open()) + { + throw std::runtime_error("Failed to save backend config file"); + } + else + { + stream << "{"; + for (const auto &backend : _measurements) + { + printString(backend.first->config()->id(), stream); + stream << ": {"; + for (const auto &operation : backend.second) + { + printString(operation.first, stream); + stream << ": {"; + for (const auto &type : operation.second) + { + printBool(type.first, stream); + stream << ": ["; + printOperation(type.second, stream); + stream << "], "; + } + stream.seekp(-2, std::ofstream::end); + stream << "}, "; + } + stream.seekp(-2, std::ofstream::end); + stream << "}, "; + } + stream.seekp(-2, std::ofstream::end); + stream << "}"; + stream.close(); + } +} + +void JSON::loadOperationsExecTime() +{ + std::ifstream stream(_measurement_file); + if (stream.is_open()) + { + std::string backend; + std::string operation; + bool quant = false; + char buf; + int number_of_open_braces = 0; + + while (stream.good()) + { + stream.get(buf); + switch (buf) + { + case '{': + number_of_open_braces++; + break; + case '}': + number_of_open_braces--; + break; + case '"': + { + if (number_of_open_braces == 1) + { + // read backend string + readString(backend, stream); + } + if (number_of_open_braces == 2) + { + // read operation string + readString(operation, stream); + } + if (number_of_open_braces == 3) + { + // read operation string + readBool(quant, stream); + } + break; + } + case '[': + { + // reading and creating all info for operation + readOperation(backend, operation, quant, stream); + break; + } + default: + break; + } + } + stream.close(); + } +} + +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/core/src/compiler/BackendResolver.cc b/runtime/neurun/core/src/compiler/BackendResolver.cc new file mode 100644 index 000000000..0c544190c --- /dev/null +++ b/runtime/neurun/core/src/compiler/BackendResolver.cc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendResolver.h" + +namespace neurun +{ +namespace compiler +{ + +BackendResolver::BackendResolver(const BackendResolver &obj) + : _context_manager{}, _gen_map{obj._gen_map} +{ + for (const auto &e : obj._context_manager) + { + _context_manager.emplace(e.first, nnfw::cpp14::make_unique<backend::BackendContext>(*e.second)); + } +} + +BackendResolver &BackendResolver::operator=(const BackendResolver &obj) +{ + _gen_map = obj._gen_map; + + _context_manager.clear(); + for (const auto &e : obj._context_manager) + { + _context_manager.emplace(e.first, nnfw::cpp14::make_unique<backend::BackendContext>(*e.second)); + } + + return *this; +} + +} // namespace compiler +} // namespace neurun diff --git a/runtime/neurun/core/src/compiler/BackendResolver.h b/runtime/neurun/core/src/compiler/BackendResolver.h new file mode 100644 index 000000000..ddcae793a --- /dev/null +++ b/runtime/neurun/core/src/compiler/BackendResolver.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_COMPILER_BACKEND_RESOLVER_H__ +#define __NEURUN_COMPILER_BACKEND_RESOLVER_H__ + +#include <unordered_map> +#include <typeindex> + +#include "util/logging.h" +#include "backend/Backend.h" +#include "backend/BackendManager.h" +#include "backend/ITensorBuilder.h" +#include "ir/OperationIndexMap.h" + +namespace neurun +{ +namespace compiler +{ + +class BackendResolver +{ +public: + BackendResolver(const ir::Operands &operands, + const std::vector<const backend::Backend *> &backends, + const std::shared_ptr<backend::custom::IKernelBuilder> &kb) + { + for (const auto backend : backends) + { + _context_manager.emplace(backend, backend->newContext(operands, kb)); + } + } + + ~BackendResolver() = default; + BackendResolver(const BackendResolver &obj); + BackendResolver(BackendResolver &&obj) = default; + BackendResolver &operator=(const BackendResolver &obj); + BackendResolver &operator=(BackendResolver &&obj) = default; + +public: + const backend::BackendContext *getBackendContext(const ir::OperationIndex &index) const + { + return _context_manager.at(_gen_map.at(index)).get(); + } + + const backend::BackendContext *getBackendContext(const backend::Backend *backend) const + { + return _context_manager.at(backend).get(); + } + + backend::TensorBuilderSet tensor_builders() const + { + backend::TensorBuilderSet ret; + for (const auto &e : _context_manager) + { + ret.insert(e.second->tensor_builder); + } + return ret; + } + + const backend::Backend *getBackend(const ir::OperationIndex &index) const + { + return getBackendContext(index)->backend; + } + + void setBackend(const ir::OperationIndex &index, const backend::Backend *backend) + { + _gen_map[index] = backend; + } + + void iterate(const std::function<void(const ir::OperationIndex &, + const backend::BackendContext &)> &fn) const + { + for (const auto &e : _gen_map) + { + fn(e.first, *_context_manager.at(e.second)); + } + } + +private: + std::unordered_map<const backend::Backend *, std::unique_ptr<backend::BackendContext>> + _context_manager; + ir::OperationIndexMap<const backend::Backend *> _gen_map; +}; + +} // namespace compiler +} // namespace neurun + +#endif // __NEURUN_COMPILER_BACKEND_RESOLVER_H__ diff --git a/runtime/neurun/core/src/compiler/CodeWithInfo.h b/runtime/neurun/core/src/compiler/CodeWithInfo.h new file mode 100644 index 000000000..73dd1055b --- /dev/null +++ b/runtime/neurun/core/src/compiler/CodeWithInfo.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_COMPILER_CODE_WITH_INFO_H__ +#define __NEURUN_COMPILER_CODE_WITH_INFO_H__ + +#include <memory> + +#include "compiler/Linear.h" +#include "exec/IFunction.h" + +namespace neurun +{ +namespace compiler +{ + +struct CodeWithInfo +{ + Linear::Element elem; + std::unique_ptr<exec::IFunction> fn; + + CodeWithInfo(const Linear::Element &elem, std::unique_ptr<exec::IFunction> &&fn) + : elem{elem}, fn{std::move(fn)} + { + } +}; + +} // namespace compiler +} // namespace neurun + +#endif // __NEURUN_COMPILER_CODE_WITH_INFO_H__ diff --git a/runtime/neurun/core/src/compiler/Compiler.cc b/runtime/neurun/core/src/compiler/Compiler.cc new file mode 100644 index 000000000..d5ee39d85 --- /dev/null +++ b/runtime/neurun/core/src/compiler/Compiler.cc @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compiler/Compiler.h" + +#include "BackendResolver.h" +#include "ParamChecker.h" +#include "ExecutorFactory.h" +#include "OperationValidator.h" + +#include "compiler/IScheduler.h" +#include "compiler/ManualScheduler.h" +#include "compiler/HEScheduler.h" +#include "backend/ExecTime.h" +#include "ir/operation/LowerInfo.h" +#include "dumper/dot/DotDumper.h" +#include "compiler/Linear.h" +#include "exec/interp/ExecManager.h" +#include "backend/ExecTime.h" +#include "util/ConfigSource.h" + +namespace neurun +{ + +namespace compiler +{ + +static void checkProfilerConditions() +{ + if (!util::getConfigBool(util::config::USE_SCHEDULER)) + throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling."); + + if (util::getConfigString(util::config::EXECUTOR) != "Dataflow") + throw std::runtime_error("Profiling mode works only with 'Dataflow' executor"); +} + +void Compiler::compile(void) +{ + _state = State::STARTED; + + /*************************************************** + * Prepare compilation phase + ***************************************************/ + + // Operation validation check + OperationValidator{*_graph}(); + + // Compilable check + if (!checkCompilable()) + { + _executor = std::make_shared<exec::interp::ExecManager>(*_graph); + return; + } + + // Mode check + if (util::getConfigBool(util::config::PROFILING_MODE)) + checkProfilerConditions(); + + /*************************************************** + * Backend independent analysis & optimization phase + ***************************************************/ + // Schedule + std::unique_ptr<BackendResolver> br; + std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks; + if (util::getConfigBool(util::config::USE_SCHEDULER)) + { + auto scheduler = compiler::HEScheduler( + _graph->operands(), backend::BackendManager::get().getAll(), _graph->getKernelBuilder()); + br = scheduler.schedule(*_graph); + indexed_ranks = scheduler.getIndexedRanks(); + } + else + { + auto scheduler = compiler::ManualScheduler(); + br = scheduler.schedule(*_graph); + } + _graph->setBackendResolver(std::move(br)); + /************************************************************* + * Backend independent analysis & optimization phase finished + *************************************************************/ + + // dump graph to .dot + auto dump_level = + static_cast<dumper::dot::DotDumper::Level>(util::getConfigInt(util::config::GRAPH_DOT_DUMP)); + neurun::dumper::dot::DotDumper dot_dumper(*_graph, dump_level); + dot_dumper.dump("before_lower"); + + // Lower: decide backend + _graph->lower(); + _state = State::LOWERED; + + dot_dumper.dump("after_lower"); + + const std::string executor_str = util::getConfigString(util::config::EXECUTOR); + + _executor = + std::shared_ptr<exec::IExecutor>{ExecutorFactory::get().create(executor_str, *_graph)}; + _executor->setIndexedRanks(indexed_ranks); + /******************************** + * Code generation phase finished + ********************************/ + _state = State::COMPILED; +} + +bool Compiler::checkCompilable() +{ + // Disable compile phase + // When ready to use interpreter backend, remove this config and use backend setting + const auto env_disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE); + if (env_disable_compile) + { + return false; + } + + // TODO check unspecified operand shape + + // Check compilable parameter + ParamChecker paramChecker{_graph}; + paramChecker(); + if (paramChecker.haveNoneConstParam()) + { + return false; + } + + return true; +} + +} // namespace compiler + +} // namespace neurun diff --git a/runtime/neurun/core/src/compiler/ExecutorFactory.cc b/runtime/neurun/core/src/compiler/ExecutorFactory.cc new file mode 100644 index 000000000..59de6c4a4 --- /dev/null +++ b/runtime/neurun/core/src/compiler/ExecutorFactory.cc @@ -0,0 +1,379 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ExecutorFactory.h" + +#include <functional> +#include "exec/ExecutionObservers.h" +#include "exec/LinearExecutor.h" +#include "exec/DataflowExecutor.h" +#include "exec/ParallelExecutor.h" +#include "compiler/BackendResolver.h" +#include "backend/ExecTime.h" +#include "compiler/Linear.h" +#include "ir/dumper/Dumper.h" +#include "SubTensorAnalyzer.h" +#include "backend/IConstantInitializer.h" +#include "backend/IKernelGenerator.h" +#include "backend/IShapeFixer.h" +#include "backend/ITensorRegister.h" +#include "cpp14/memory.h" +#include "CodeWithInfo.h" + +namespace neurun +{ +namespace compiler +{ + +ExecutorFactory &ExecutorFactory::get() +{ + static ExecutorFactory singleton; + return singleton; +} + +ExecutorFactory::ExecutorFactory() +{ + _map["Linear"] = createLinearExecutor; + _map["Dataflow"] = std::bind(createDataflowExecutor, std::placeholders::_1, false); + _map["Parallel"] = std::bind(createDataflowExecutor, std::placeholders::_1, true); +} + +exec::IExecutor *ExecutorFactory::create(const std::string &id, ir::Graph &graph) +{ + return _map.at(id)(graph); +} + +exec::IExecutor *ExecutorFactory::createLinearExecutor(ir::Graph &graph) +{ + auto operand_context = std::make_shared<OperandContext>(); + + // linearize + assert(!graph.isBuildingPhase()); + auto linear = nnfw::cpp14::make_unique<Linear>(graph); + + // Dump ops + linear->accept(ir::dumper::Dumper{}); + + /************************************************* + * Backend dependent analysis & optimization phase + *************************************************/ + + // SubTensorInfo should be generated after lower, before shape correction and finalize + // because SubTensorAnalyzer assume that insert permutation is already finished + // lower: decide backend and insert permutation + // fix shapes: prepare codegen to optimization + // generate tensor objects: generate tensor using subtensor info + // generate kernels + // allocate tesor memory + // constant intialization: fill the constants with values + // Generated SubTensorInfo is in operand(Object) + // for easy pass SubTensorInfo to plan builder and tensor builder + linear->accept(SubTensorAnalyzer{graph}); + + /********************************************************** + * Backend dependent analysis & optimization phase finished + **********************************************************/ + + /*********************** + * Code generation phase + ***********************/ + + // Fix shapes + linear->iterate([&](const compiler::Linear::Element &element) { + auto backend = element.lower_info->backend(); + auto shape_fixer = graph.backend_resolver()->getBackendContext(backend)->shape_fixer; + shape_fixer->fix(*element.op_seq); + }); + + linear->planTensors(); + + auto tensor_builders = graph.backend_resolver()->tensor_builders(); + + // Prepare tensors + for (auto &tensor_builder : tensor_builders) + { + tensor_builder->prepare(); + } + + // Generate initializers + linear->generateConstantInitializers(); + + class ExecutionBuilder final : public IExecutionBuilder + { + public: + void append(std::unique_ptr<exec::IFunction> &&f) override + { + _code.emplace_back(_next_elem, std::move(f)); + } + + void setNextElem(const compiler::Linear::Element &next_elem) { _next_elem = next_elem; } + std::vector<CodeWithInfo> releaseCode() { return std::move(_code); } + + private: + compiler::Linear::Element _next_elem; + std::vector<CodeWithInfo> _code; + }; + + ExecutionBuilder builder; + + // Generate kernels + linear->iterate([&](const compiler::Linear::Element &element) { + auto backend = element.lower_info->backend(); + builder.setNextElem(element); + auto kernel_gen = graph.backend_resolver()->getBackendContext(backend)->kernel_gen; + kernel_gen->generate(*element.op_seq, &builder); + }); + + auto code = builder.releaseCode(); + + for (auto &tensor_builder : tensor_builders) + { + tensor_builder->allocateConsts(); + } + + // TODO Add optimization passes + + // Initialize constant tensors + for (const auto backend : backend::BackendManager::get().getAll()) + { + graph.backend_resolver()->getBackendContext(backend)->constant_initializer->run(); + } + + for (auto &&e : code) + { + e.fn->prepare(); + auto backend = e.elem.lower_info->backend(); + auto tensor_builder = graph.backend_resolver()->getBackendContext(backend)->tensor_builder; + tensor_builder->postFunctionPrepare(); + } + + for (auto &tensor_builder : tensor_builders) + { + tensor_builder->allocateNonconsts(); + } + + for (auto &tensor_builder : tensor_builders) + { + tensor_builder->finalize(); + } + + // Wrap tensors as Object and store them to plan + for (auto &tensor_builder : tensor_builders) + { + tensor_builder->iterate([&](const ir::OperandIndex &index) { + auto object = tensor_builder->tensorAt(index); + operand_context->set(index, object); + }); + } + + // Prepare each TensorManager on each backend + auto tensor_mgrs = nnfw::cpp14::make_unique<backend::TensorManagerSet>(); + for (auto &tensor_builder : tensor_builders) + { + tensor_mgrs->insert(tensor_builder->releaseTensorManager()); + } + + auto exec = + new exec::LinearExecutor{graph, operand_context, std::move(tensor_mgrs), std::move(code)}; + + const std::string trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH); + if (!trace_filepath.empty()) + { + std::unique_ptr<exec::IExecutionObserver> ctp = + nnfw::cpp14::make_unique<exec::ChromeTracingObserver>(trace_filepath); + exec->addObserver(std::move(ctp)); + } + + return exec; +} + +exec::IExecutor *ExecutorFactory::createDataflowExecutor(ir::Graph &graph, bool parallel) +{ + auto operand_context = std::make_shared<OperandContext>(); + + graph.subgraphs().iterate([&](const ir::SubgraphIndex &, const ir::OpSequence &subg) { + auto subtensor_analyzer = SubTensorAnalyzer{graph}; + subg.accept(subtensor_analyzer); + }); + + // Fix shapes and register tensors + graph.subgraphs().iterate([&](const ir::SubgraphIndex &subg_index, const ir::OpSequence &subg) { + auto backend = graph.getLowerInfo(subg_index)->backend(); + auto shape_fixer = graph.backend_resolver()->getBackendContext(backend)->shape_fixer; + shape_fixer->fix(subg); + const auto tensor_register = + graph.backend_resolver()->getBackendContext(backend)->tensor_register; + tensor_register->registerTensors(subg, graph.getLowerInfo()); + }); + + graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { + const auto lower_info = graph.getLowerInfo(ind); + for (auto factor : lower_info->def_factors()) + { + auto backend = factor.backend(); + auto tensor_builder = graph.backend_resolver()->getBackendContext(backend)->tensor_builder; + + if (!tensor_builder->isRegistered(ind)) + { + // These tensors do not exist in any op_seq (No use and def) + // These tensors cannot be a SubTensor + assert(obj.parent_info() == nullptr); + + const auto info = obj.info(); + const auto backend_layout = lower_info->def_factors().getOnlyElement().layout(); + // TODO Change tensor info to have permuted shape + tensor_builder->registerTensorInfo(ind, info, backend_layout, obj.isConstant()); + } + + // Is not SubTensor? + if (!backend->config()->SupportSubTensorAlloc() || obj.parent_info() == nullptr) + { + // To make this never be deallocated, this is a workaround to use static memory planner + tensor_builder->notifyFirstUse(ind); + } + } + }); + + auto tensor_builders = graph.backend_resolver()->tensor_builders(); + + for (auto &tensor_builder : tensor_builders) + { + tensor_builder->prepare(); + } + + class ExecutionBuilder : public IExecutionBuilder + { + public: + void append(std::unique_ptr<exec::IFunction> &&fn) override + { + auto itr = _code_map.find(_next_index); + if (itr == _code_map.end()) + { + _code_map[_next_index] = nnfw::cpp14::make_unique<exec::FunctionSequence>(); + } + _code_map[_next_index]->append(std::move(fn)); + }; + + // TODO Remove this method and make `append` to get index value as an argument + void setNextIndex(const ir::SubgraphIndex next_index) { _next_index = next_index; } + + exec::DataflowExecutor::CodeMap &&releaseCodeMap() { return std::move(_code_map); } + + private: + ir::SubgraphIndex _next_index; + exec::DataflowExecutor::CodeMap _code_map; + }; + + auto execution_builder = nnfw::cpp14::make_unique<ExecutionBuilder>(); + + // Generate kernels + graph.subgraphs().iterate([&](const ir::SubgraphIndex &subg_index, const ir::OpSequence &subg) { + auto backend = graph.getLowerInfo(subg_index)->backend(); + auto constant_initializer = + graph.backend_resolver()->getBackendContext(backend)->constant_initializer; + constant_initializer->generate(subg, graph.operands()); + // TODO This approach is temporal. See declaration of `setNextIndex`. + execution_builder->setNextIndex(subg_index); + auto kernel_gen = graph.backend_resolver()->getBackendContext(backend)->kernel_gen; + kernel_gen->generate(subg, execution_builder.get()); + }); + + for (const auto &tensor_builder : tensor_builders) + { + tensor_builder->allocateConsts(); + } + + // Initialize constant tensors + for (const auto backend : backend::BackendManager::get().getAll()) + { + graph.backend_resolver()->getBackendContext(backend)->constant_initializer->run(); + } + + exec::DataflowExecutor::CodeMap code_map = execution_builder->releaseCodeMap(); + + for (auto &it : code_map) + { + auto subg_index = it.first; + auto &function_sequence = *(it.second); + + function_sequence.iterate([&](exec::IFunction &ifunc) { + // NOTE. It may need avoiding prepare() for some operations + // Ref: https://github.sec.samsung.net/STAR/nnfw/issues/7326 + ifunc.prepare(); + auto backend = graph.getLowerInfo(subg_index)->backend(); + auto tensor_builder = graph.backend_resolver()->getBackendContext(backend)->tensor_builder; + tensor_builder->postFunctionPrepare(); + }); + } + + for (const auto &tensor_builder : tensor_builders) + { + tensor_builder->allocateNonconsts(); + } + + for (auto &tensor_builder : tensor_builders) + { + tensor_builder->finalize(); + } + + // Wrap tensors as Object and store them to plan + for (auto &tensor_builder : tensor_builders) + { + tensor_builder->iterate([&](const ir::OperandIndex &index) { + auto object = tensor_builder->tensorAt(index); + operand_context->set(index, object); + }); + } + + // Prepare each TensorManager on each backend + auto tensor_mgrs = nnfw::cpp14::make_unique<backend::TensorManagerSet>(); + for (auto &tensor_builder : tensor_builders) + { + tensor_mgrs->insert(tensor_builder->releaseTensorManager()); + } + + exec::ExecutorBase *exec = nullptr; + if (parallel) + { + exec = new exec::ParallelExecutor{graph, operand_context, std::move(tensor_mgrs), + std::move(code_map)}; + } + else + { + exec = new exec::DataflowExecutor{graph, operand_context, std::move(tensor_mgrs), + std::move(code_map)}; + if (util::getConfigBool(util::config::PROFILING_MODE)) + { + auto et = std::make_shared<backend::ExecTime>(backend::BackendManager::get().getAll()); + std::unique_ptr<exec::IExecutionObserver> obs = + nnfw::cpp14::make_unique<exec::ProfileObserver>(et); + exec->addObserver(std::move(obs)); + } + } + + const std::string trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH); + if (!trace_filepath.empty()) + { + std::unique_ptr<exec::IExecutionObserver> ctp = + nnfw::cpp14::make_unique<exec::ChromeTracingObserver>(trace_filepath); + exec->addObserver(std::move(ctp)); + } + + return exec; +} + +} // namespace compiler +} // namespace neurun diff --git a/runtime/neurun/core/src/compiler/ExecutorFactory.h b/runtime/neurun/core/src/compiler/ExecutorFactory.h new file mode 100644 index 000000000..6da4ffc5e --- /dev/null +++ b/runtime/neurun/core/src/compiler/ExecutorFactory.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_COMPILER_EXECUTOR_FACTORY_H__ +#define __NEURUN_COMPILER_EXECUTOR_FACTORY_H__ + +#include <unordered_map> + +#include "exec/IExecutor.h" +#include "ir/Graph.h" + +namespace neurun +{ +namespace compiler +{ + +class ExecutorFactory +{ +public: + static ExecutorFactory &get(); + +public: + exec::IExecutor *create(const std::string &id, ir::Graph &graph); + +private: + ExecutorFactory(); + +private: + static exec::IExecutor *createLinearExecutor(ir::Graph &graph); + static exec::IExecutor *createDataflowExecutor(ir::Graph &graph, bool parallel); + +private: + std::unordered_map<std::string, std::function<exec::IExecutor *(ir::Graph &)>> _map; +}; + +} // namespace compiler +} // namespace neurun + +#endif // __NEURUN_COMPILER_EXECUTOR_FACTORY_H__ diff --git a/runtime/neurun/core/src/compiler/HEScheduler.cc b/runtime/neurun/core/src/compiler/HEScheduler.cc new file mode 100644 index 000000000..aec68d655 --- /dev/null +++ b/runtime/neurun/core/src/compiler/HEScheduler.cc @@ -0,0 +1,628 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Operand.h" +#include "compiler/HEScheduler.h" +#include "ir/Graph.h" +#include "util/ConfigSource.h" +#include "compiler/IExecutionBuilder.h" +#include "compiler/BackendResolver.h" +#include "backend/IShapeFixer.h" +#include "util/logging.h" +#include "util/Utils.h" +#include "exec/FunctionSequence.h" +#include <cassert> +#include <cmath> +#include <chrono> + +namespace neurun +{ + +namespace compiler +{ +static uint32_t getOperationsFlattenedIOSize(const ir::Graph &graph, const ir::Operation &node) +{ + uint32_t size = 0; + for (const auto &input : node.getInputs()) + { + size += graph.operands().at(input).info().total_size(); + } + for (const auto &output : node.getOutputs()) + { + size += graph.operands().at(output).info().total_size(); + } + return size; +} + +static bool isQuant(const ir::Graph &graph, const ir::Operation &node) +{ + for (const auto &input : node.getInputs()) + { + const auto &obj = graph.operands().at(input); + if (obj.typeInfo().type() == ir::DataType::QUANT8_ASYMM) + { + return true; + } + } + return false; +} + +static bool isWorkaroundSkip(const ir::Graph &graph, const backend::Backend *backend, + const ir::Operation &node, bool quant) +{ + /* TODO: this is workaround, come up with better solution if have. + Adding exception in stage doesn't help. Because if there is a record for add without + broadcast, scheduling will select it since it doesn't distinguish broadcast and + non-broadcast like it does for quant non-quantized*/ + if (backend->config()->id() == "cpu" && + (node.opcode() == ir::OpCode::Add || node.opcode() == ir::OpCode::Sub || + node.opcode() == ir::OpCode::Mul)) + { + const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)}; + /*Broadcasting isn't supported on CPU: no way to differ the existing exec_time record with and + * without broadcasting*/ + if (!(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape())) + { + return true; + } + } + /* TODO: this is workaround, come up with better solution if have. + Adding exception in stage doesn't help. Because if there is a record for Mul without + broadcast, scheduling will select it since it doesn't distinguish broadcast and + non-broadcast like it does for quant non-quantized*/ + else if (backend->config()->id() == "acl_neon" && node.opcode() == ir::OpCode::Mul) + { + const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)}; + + // Nontrivial broadcasting isn't supported yet + if (quant || + !(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape())) + { + return true; + } + } + return false; +} + +// if a node can be merged into op_seq +static bool isMergeable(const ir::Graph &graph, const ir::Operation &node) +{ + size_t prev_op_cnt = 0; + for (const auto &input : node.getInputs()) + { + // only valid_inputs + const auto &operand = graph.operands().at(input); + if (operand.isConstant()) + continue; + + // This operand is output of operation, not weight or bias + if (operand.getDef().list().size() > 0) + ++prev_op_cnt; + + // Current node has multiple inputs as concat or at the beginning of the separated branch + if (prev_op_cnt > 1 || operand.getUses().list().size() > 1) + { + return false; + } + } + return true; +} + +void HEScheduler::scheduleShufflingBackends() +{ + VERBOSE(HEScheduler::schedule) + << "Started task scheduling: uses all backends to get more metrics for data transfer" + << std::endl; + size_t backend_ind = 0; + for (const auto &rank : _rank_to_op) + { + VERBOSE(HEScheduler::schedule) << "scheduling (" << rank.second.value() << ")" << std::endl; + const auto &node = _graph->operations().at(rank.second); + const bool quant = isQuant(*_graph, node); + const auto size = getOperationsFlattenedIOSize(*_graph, node); + for (size_t i = 0;; ++i) + { + if (i == _all_backends.size()) + { + // wasn't able to find backend + assert(false); + break; + } + if (backend_ind == _all_backends.size()) + { + backend_ind = 0; + } + if (isWorkaroundSkip(*_graph, _all_backends[backend_ind], node, quant)) + { + ++backend_ind; + continue; + } + const auto exec_time = + _exec_time->getOperationExecTime(_all_backends[backend_ind], node.name(), quant, size); + // Scheduling to measure data transfer must be done after measuring all backends separately + assert(exec_time != _exec_time->NOT_FOUND); + if (exec_time == _exec_time->getMax()) + { + ++backend_ind; + continue; + } + _backend_resolver->setBackend(rank.second, _all_backends[backend_ind]); + VERBOSE(HEScheduler::schedule) << "backend for " << node.name() << " is " + << _all_backends[backend_ind]->config()->id() << std::endl; + ++backend_ind; + break; + } + } +} + +bool HEScheduler::isNodeProfiled(const ir::Operation &node) +{ + const bool quant = isQuant(*_graph, node); + const auto size = getOperationsFlattenedIOSize(*_graph, node); + for (const auto *backend : _all_backends) + { + const auto exec_time = _exec_time->getOperationExecTime(backend, node.name(), quant, size); + if (exec_time == _exec_time->NOT_FOUND) + return false; + } + return true; +} + +void HEScheduler::scheduleBranch(const ir::OperationIndex &index, + ir::OperationIndexMap<bool> &scheduled) +{ + auto loc_index = index; + const backend::Backend *parent_backend = nullptr; + while (true) + { + if (scheduled[loc_index]) + { + return; + } + if (!schedule(loc_index, parent_backend)) + { + return; + } + scheduled[loc_index] = true; + parent_backend = _backend_resolver->getBackend(loc_index); + + const auto &node = _graph->operations().at(loc_index); + /* get the only output operand, that is input of the next single operation + * and just this nodes output.*/ + if (node.getOutputs().size() != 1) + { + return; + } + const auto &only_out_operand = _graph->operands().at(*node.getOutputs().begin()); + loc_index = only_out_operand.getUses().list().front(); + /* verify, that next node is neither beginning nor ending node of a branch*/ + const auto &next_node = _graph->operations().at(loc_index); + if (!isMergeable(*_graph, next_node)) + { + return; + } + } +} + +std::unique_ptr<compiler::BackendResolver> HEScheduler::schedule(const ir::Graph &graph) +{ + _graph = &graph; + VERBOSE(HEScheduler::schedule) << "task scheduling started" << std::endl; + // Make ranks and save in descending order + makeRank(); + + for (const auto *backend : _all_backends) + { + _backends_avail_time.emplace(backend, std::map<int64_t, int64_t>{{0, 0}}); + } + + const bool is_profiling = util::getConfigBool(util::config::PROFILING_MODE); + if (is_profiling) + { + // Check if profiling info about all backend/node pairs already exists + bool all_nodes_are_profiled = true; + _graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) { + if (all_nodes_are_profiled) + all_nodes_are_profiled = isNodeProfiled(op); + }); + + // If all nodes are already profiled - schedule backends in such order, so more profiling + // information about between-backends data transfer could be collected + if (all_nodes_are_profiled) + { + scheduleShufflingBackends(); + VERBOSE(HEScheduler::schedule) << "task scheduling finished" << std::endl; + return std::move(_backend_resolver); + } + } + + ir::OperationIndexMap<bool> visited; + graph.operations().iterate( + [&](const ir::OperationIndex &index, const ir::Operation &) { visited[index] = false; }); + // for each task select the backend with the smallest earliest finishing time(eft) + for (const auto &rank : _rank_to_op) + { + scheduleBranch(rank.second, visited); + } + VERBOSE(HEScheduler::schedule) << "task scheduling finished" << std::endl; + return std::move(_backend_resolver); +} + +int64_t HEScheduler::getOpTime(const backend::Backend *backend, const std::string &operation, + bool quant, uint32_t size) +{ + const auto time = _exec_time->getOperationExecTime(backend, operation, quant, size); + if (time != _exec_time->NOT_FOUND) + return time; + + return _is_supported.at(backend).at(operation) ? 1 : _exec_time->getMax(); +} + +int64_t HEScheduler::getPermuteTime(const backend::Backend *src_backend, + const backend::Backend *dst_backend, bool quant, uint32_t size) +{ + const auto time = _exec_time->getPermuteTime(src_backend, dst_backend, quant, size); + if (time != _exec_time->NOT_FOUND) + return time; + + // Makes the scheduler prefer keeping computations on one backend + return size / 200; +} + +int64_t HEScheduler::tryBackend(const ir::Operation &node, const backend::Backend *backend) +{ + // if there is no profiling info don't use this backend during scheduling + if (!util::getConfigBool(util::config::PROFILING_MODE)) + { + VERBOSE(HEScheduler::tryBackend) + << "Trying to HE schedule while there is no profiling info for " << node.name() + << " on backend " << backend->config()->id() << ". So this backend won't be used. " + << std::endl; + _is_supported[backend][node.name()] = false; + return _exec_time->getMax(); + } + auto iter = _is_supported.find(backend); + if (iter != _is_supported.end()) + { + auto it2 = iter->second.find(node.name()); + if (it2 != iter->second.end()) + { + return _is_supported[backend][node.name()] ? 1 : _exec_time->getMax(); + } + } + try + { + node.accept(*_backend_resolver->getBackendContext(backend)->shape_fixer); + + _is_supported[backend][node.name()] = true; + } + catch (std::runtime_error &e) + { + _is_supported[backend][node.name()] = false; + } + return _is_supported[backend][node.name()] ? 1 : _exec_time->getMax(); +} + +void HEScheduler::makeRank() +{ + VERBOSE(HEScheduler::makeRank) << "task prioritizing" << std::endl; + + _graph->operations().iterate( + [&](const ir::OperationIndex &index, const ir::Operation &) { DFSMaxRank(index); }); + + // Check that ranks are calculated for all operations(nodes) + _graph->operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) { + UNUSED_RELEASE(index); + assert(_op_to_rank->find(index) != _op_to_rank->end()); + }); + VERBOSE(HEScheduler::makeRank) << "task prioritizing finished" << std::endl; +} + +int64_t HEScheduler::DFSMaxRank(const ir::OperationIndex &index) +{ + auto op_to_rank_it = _op_to_rank->find(index); + if (op_to_rank_it != _op_to_rank->end()) + return op_to_rank_it->second; + + const auto &node = _graph->operations().at(index); + int64_t rank = 0; + const bool quant = isQuant(*_graph, node); + const auto size = getOperationsFlattenedIOSize(*_graph, node); + auto supported_backends_quantity = static_cast<int64_t>(_all_backends.size()); + + const auto max_child_rank = DFSChildrenMaxRank(index); + + // get average exec time of this op + for (const auto &backend : _all_backends) + { + auto exec_time = _exec_time->getOperationExecTime(backend, node.name(), quant, size); + if (exec_time == _exec_time->NOT_FOUND) + { + exec_time = tryBackend(node, backend); + } + if (exec_time < _exec_time->getMax()) + { + rank += exec_time; + } + else + { + // this operation isn't supported in this backend + --supported_backends_quantity; + } + } + if (supported_backends_quantity == 0) + { + throw std::runtime_error{"Encountered unsupported op: " + node.name()}; + } + rank /= supported_backends_quantity; + + // get standard deviation + int64_t std = 0; + for (const auto backend : _all_backends) + { + const auto exec_time = getOpTime(backend, node.name(), quant, size); + if (exec_time < _exec_time->getMax()) + { + std += (exec_time - rank) * (exec_time - rank); + } + } + std /= supported_backends_quantity; + if (std > 0) + { + std = static_cast<int>(std::sqrt(std)); + rank *= std; + } + rank += max_child_rank; + + assert(rank >= 0); + _rank_to_op.emplace(rank, index); + _op_to_rank->emplace(index, rank); + VERBOSE(HEScheduler::DFSMaxRank) << "rank of operation (" << index.value() << ")" << node.name() + << " is " << rank << std::endl; + + return rank; +} + +int64_t HEScheduler::DFSChildrenMaxRank(const ir::OperationIndex &index) +{ + const auto &node = _graph->operations().at(index); + int64_t max_child_rank = 0; + for (const auto &output : node.getOutputs()) + { + const auto &operand = _graph->operands().at(output); + const bool quant = operand.typeInfo().type() == ir::DataType::QUANT8_ASYMM; + // average data transfer cost of this operand's data + int64_t avg_transfer_cost = 1; + for (const auto *backend : _all_backends) + { + for (const auto *other_backend : _all_backends) + { + if (backend == other_backend) + { + continue; + } + auto transfer_cost = + _exec_time->getPermuteTime(backend, other_backend, quant, operand.info().total_size()); + if (transfer_cost == _exec_time->NOT_FOUND) + { + // Makes the scheduler prefer keeping computations on one backend + transfer_cost = operand.info().total_size() / 100; + } + avg_transfer_cost += transfer_cost; + } + } + avg_transfer_cost /= _all_backends.size(); + for (const auto &use : operand.getUses().list()) + { + const auto cur_child_rank = DFSMaxRank(use); + max_child_rank = std::max(max_child_rank, cur_child_rank + avg_transfer_cost); + } + } + return max_child_rank; +} + +int64_t HEScheduler::backendAvailableTime(const backend::Backend *backend, + const int64_t &starting_time, const int64_t &time_amount) +{ + const auto backend_times = _backends_avail_time.at(backend); + // finishing and starting times of an op, that will come after current op + auto next_op_fst = backend_times.upper_bound(starting_time); + // finishing time of an op, that will come before current op + auto prev_op_ft = starting_time; + // until reach the "hole/gap", that is enough to run this op + while (next_op_fst != backend_times.end() && next_op_fst->second - prev_op_ft <= time_amount) + { + prev_op_ft = next_op_fst->first + 1; + ++next_op_fst; + } + return prev_op_ft; +} + +bool HEScheduler::schedule(const ir::OperationIndex &index, const backend::Backend *parent_backend) +{ + VERBOSE(HEScheduler::schedule) << "scheduling (" << index.value() << ")" << std::endl; + int64_t eft = std::numeric_limits<int64_t>::max(), selected_exec_time = 0; + const auto &node = _graph->operations().at(index); + + std::multimap<int64_t, int64_t> selected_transfer_st_exec_time; + // select the backend with the smallest eft of this task + const backend::Backend *chosen_backend = nullptr; + for (const auto *backend : _all_backends) + { + std::multimap<int64_t, int64_t> transfer_st_exec_time; + const auto est_and_et = ESTAndExecTime(backend, index, transfer_st_exec_time); + + if (eft > est_and_et.first + est_and_et.second) + { + eft = est_and_et.first + est_and_et.second; + selected_exec_time = est_and_et.second; + chosen_backend = backend; + selected_transfer_st_exec_time = transfer_st_exec_time; + } + } + + if (chosen_backend == nullptr) + { + throw std::runtime_error{"Fail to choose backend on scheduler"}; + } + + // this is part of a branch and it is assigned another backend + if (parent_backend && parent_backend != chosen_backend) + { + return false; + } + for (const auto &it : selected_transfer_st_exec_time) + { + auto prev_op_ft = backendAvailableTime(_cpu_backend, it.first, it.second); + _backends_avail_time[_cpu_backend].insert({prev_op_ft + it.second, prev_op_ft}); + } + + _ops_eft[index] = eft; + _backends_avail_time[chosen_backend].emplace(eft, eft - selected_exec_time); + _backend_resolver->setBackend(index, chosen_backend); + + VERBOSE(HEScheduler::schedule) << "backend for " << node.name() << " is " + << chosen_backend->config()->id() << ". Its eft: " << eft + << std::endl; + return true; +} + +std::pair<int64_t, int64_t> +HEScheduler::ESTAndExecTime(const backend::Backend *backend, const ir::OperationIndex &index, + std::multimap<int64_t, int64_t> &transfer_st_exec_time) +{ + const bool is_linear_exec = "Linear" == util::getConfigString(util::config::EXECUTOR); + const bool is_parallel_exec = "Parallel" == util::getConfigString(util::config::EXECUTOR); + // Permutation will cause creating a separate op_seq that contains just this permutation node. + // This isn't needed for Linear executor since it doesn't use subgraphs + // Number 1 ms is picked experimentally + int64_t permute_fine = 1000; + // Multiply cpu operations' exec time by 2 because in parallel executor it might be busy with + // permutation on other branches or non-nnfw specific tasks and have to wait for it. + // Number 2 is picked experimentally + const int64_t CPU_DELAY = 2; + const auto &node = _graph->operations().at(index); + const bool quant = isQuant(*_graph, node); + const auto size = getOperationsFlattenedIOSize(*_graph, node); + // if this node can be part of a op_seq, then assigning different backend will cause creating + // another op_seq + if (isMergeable(*_graph, node)) + { + permute_fine *= 2; + } + if (isWorkaroundSkip(*_graph, backend, node, quant)) + { + return {_exec_time->getMax(), _exec_time->getMax()}; + } + // get average exec time of the op on this backend + auto exec_time = getOpTime(backend, node.name(), quant, size); + if (backend->config()->id() == "cpu" && is_parallel_exec) + { + exec_time *= CPU_DELAY; + } + + // get max eft of direct (one level above) predecessors + auto max_pred_eft = predMaxEFT(backend, node, transfer_st_exec_time); + + int64_t total_transfer_cost = 0; + std::vector<std::multimap<int64_t, int64_t>::iterator> inserted_permutations; + // Find free time for data transferring and insert it into backend taskset. This is needed: + // 1. Time for multiple permutations for this node's input is found correctly + // 2. If backend==cpu, then free time for this node must come after permutations + for (auto &it : transfer_st_exec_time) + { + if (is_parallel_exec) + { + it.second *= CPU_DELAY; + } + if (!is_linear_exec) + { + it.second += permute_fine; + } + total_transfer_cost += it.second; + + const auto prev_op_ft = backendAvailableTime(_cpu_backend, it.first, it.second); + + max_pred_eft = std::max(max_pred_eft, prev_op_ft + it.second); + + const auto tmp = _backends_avail_time[_cpu_backend].emplace(prev_op_ft + it.second, prev_op_ft); + inserted_permutations.push_back(tmp.first); + } + // find the hole/gap, where this op can be put or the finishing time of the last assigned op + auto prev_op_ft = backendAvailableTime(backend, max_pred_eft, exec_time); + + // Remove inserted permutation from cpu's task set + for (const auto &it : inserted_permutations) + { + _backends_avail_time[_cpu_backend].erase(it); + } + + /* In case non-parallel executor measure just exec time and data transfer time + * because EFT(prev_op_ft) is the same for all backends. Since two operations + * can't be run simultaneously, finish of running operation must be waited for. + * When an operation starts, all backends are free. So, they need time just for + * data transfer.*/ + if (!is_parallel_exec) + { + VERBOSE(HEScheduler::ESTAndExecTime) + << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on " + << backend->config()->id() << " is " << exec_time + << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl; + + return {total_transfer_cost, exec_time}; + } + VERBOSE(HEScheduler::ESTAndExecTime) + << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on " + << backend->config()->id() << ": " << exec_time + << " microseconds. Backend available time: " << prev_op_ft + << " Parent's max eft: " << max_pred_eft - total_transfer_cost + << " data transfer cost: " << total_transfer_cost << std::endl; + + return {prev_op_ft, exec_time}; +} + +int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const ir::Operation &node, + std::multimap<int64_t, int64_t> &transfer_st_exec_time) +{ + int64_t max_pred_eft = 0; + for (const auto &input_operand_idx : node.getInputs()) + { + const auto &input_operand = _graph->operands().at(input_operand_idx); + const bool quant = input_operand.typeInfo().type() == ir::DataType::QUANT8_ASYMM; + + for (const auto &input_node_idx : input_operand.getDef().list()) + { + // Data transfer cost from parent's node backend to current node's backend: + auto parent_backend = _backend_resolver->getBackend(input_node_idx); + + max_pred_eft = std::max(max_pred_eft, _ops_eft.at(input_node_idx)); + if (parent_backend != backend) + { + // Multiply operand size by 2 because size must describe input+output size + int64_t transfer_cost = + getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2); + transfer_st_exec_time.emplace(_ops_eft.at(input_node_idx), transfer_cost); + } + } + } + return max_pred_eft; +} + +} // namespace compiler + +} // namespace neurun diff --git a/runtime/neurun/core/src/compiler/HEScheduler.h b/runtime/neurun/core/src/compiler/HEScheduler.h new file mode 100644 index 000000000..538427065 --- /dev/null +++ b/runtime/neurun/core/src/compiler/HEScheduler.h @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file HEScheduler.h + * @brief This file contains HEScheduler class to define and run task Heterogeneous Execution + * Scheduler + */ + +#ifndef __NEURUN_COMPILER_H_E_SCHEDULER_H_ +#define __NEURUN_COMPILER_H_E_SCHEDULER_H_ + +#include "compiler/IScheduler.h" +#include "ir/Graph.h" +#include "backend/ExecTime.h" +#include "backend/Backend.h" +#include "cpp14/memory.h" +#include "ir/OperationIndexMap.h" +#include <map> +#include <memory> + +namespace neurun +{ + +namespace compiler +{ +/** + * @brief Class to schedule tasks + */ +class HEScheduler : IScheduler +{ +public: + /** + * @brief Construct a new Heterogeneous Execution Scheduler object + * @param[in] model Graph model + * @param[in] backend_resolver backend resolver + */ + HEScheduler(const ir::Operands &operands, std::vector<const backend::Backend *> backends, + const std::shared_ptr<backend::custom::IKernelBuilder> &kb) + : _is_supported{}, _backends_avail_time{}, _ops_eft{}, + _op_to_rank{std::make_shared<ir::OperationIndexMap<int64_t>>()}, + _all_backends(std::move(backends)) + { + _backend_resolver = + nnfw::cpp14::make_unique<compiler::BackendResolver>(operands, _all_backends, kb); + _exec_time = nnfw::cpp14::make_unique<backend::ExecTime>(_all_backends); + + // Find cpu backend + auto cpu_backend_it = std::find_if( + _all_backends.begin(), _all_backends.end(), + [](const backend::Backend *backend) { return backend->config()->id() == "cpu"; }); + if (cpu_backend_it == _all_backends.end()) + throw std::runtime_error("HEScheduler could be used only if 'cpu' backend is available"); + _cpu_backend = *cpu_backend_it; + } + +public: + /** + * @brief Task scheduling + * + * @note The main idea is taken from HSIP algo: + * https://www.hindawi.com/journals/sp/2016/3676149/ + */ + std::unique_ptr<compiler::BackendResolver> schedule(const ir::Graph &graph) final; + std::shared_ptr<ir::OperationIndexMap<int64_t>> getIndexedRanks() { return _op_to_rank; } + +private: + bool isNodeProfiled(const ir::Operation &); + + bool schedule(const ir::OperationIndex &, const backend::Backend *parent_backend); + /** + * @brief Get earliest starting time and execution time of an operation on a backend. + * + * @note Returns a time when operation's inputs are ready and backend is available + * It also returns exec time. If this is "cpu" backend, then exec_time*CPU_DELAY + * + * @param[in] backend: backend, for which to return the time + * @param[in] index: index of an operation + * @param[out] transfer_st_exec_time: est and exec time of data transfer operation + * + * @return earliest starting time and execution time + */ + std::pair<int64_t, int64_t> + ESTAndExecTime(const backend::Backend *backend, const ir::OperationIndex &index, + std::multimap<int64_t, int64_t> &transfer_st_exec_time); + /** + * @brief Returns the latest finishing time of parents of a node. + * + * @param[in] backend: backend, for which to return the time + * @param[in] node: node to get eft of parents + * @param[out] transfer_st_exec_time: est and exec time of data transfer operation + * + * @return earliest finishing time of parent nodes + */ + int64_t predMaxEFT(const backend::Backend *backend, const ir::Operation &node, + std::multimap<int64_t, int64_t> &transfer_st_exec_time); + + void makeRank(); + + int64_t DFSMaxRank(const ir::OperationIndex &index); + + int64_t DFSChildrenMaxRank(const ir::OperationIndex &index); + /** + * @brief Returns the time, when backend is available for at least given amount of time. + * + * @note Returns either hole/gap between two performing two already scheduled operations, + * or the finishing time of the last scheduled operation + * + * @param[in] backend backend, for which to return the time + * @param[in] starting_time time, starting which to look for gap + * @param[in] time_amount amount of the time, for which to look gap + * + * @return time, when backend has at least time_amount free time + */ + int64_t backendAvailableTime(const backend::Backend *backend, const int64_t &starting_time, + const int64_t &time_amount); + + int64_t getOpTime(const backend::Backend *backend, const std::string &operation, bool quant, + uint32_t size); + + int64_t getPermuteTime(const backend::Backend *src_backend, const backend::Backend *dst_backend, + bool quant, uint32_t size); + + void scheduleShufflingBackends(); + + int64_t tryBackend(const ir::Operation &node, const backend::Backend *backend); + + /** + * @brief Schedule a node and its successor until: + * 1. there is no branching or connection of multiple branches + * 2. for subsequent nodes: other than predecessor's backend is prefered + * + * @param[in] index: index of an operation + * @param[in] scheduled: a map to check if this node has already been scheduled + * + * @return N/A + */ + void scheduleBranch(const ir::OperationIndex &index, ir::OperationIndexMap<bool> &scheduled); + +private: + // This variable stores backend/node pairs with unknown execution time, and hints scheduler + // whether it should assign these backends to these nodes: + // * It stores false for unsupported nodes + // * During rank calculation with enabled profiling mode it stores true for supported nodes + std::unordered_map<const backend::Backend *, std::unordered_map<std::string, bool>> _is_supported; + // Finishing and starting time of each backend + std::unordered_map<const backend::Backend *, std::map<int64_t, int64_t>> _backends_avail_time; + ir::OperationIndexMap<int64_t> _ops_eft; + std::multimap<int64_t, ir::OperationIndex, std::greater<int64_t>> _rank_to_op; + std::shared_ptr<ir::OperationIndexMap<int64_t>> _op_to_rank; + std::unique_ptr<compiler::BackendResolver> _backend_resolver; + std::unique_ptr<backend::ExecTime> _exec_time; + const ir::Graph *_graph{nullptr}; + const std::vector<const backend::Backend *> _all_backends; + const backend::Backend *_cpu_backend{nullptr}; +}; + +} // namespace compiler + +} // namespace neurun + +#endif // __NEURUN_COMPILER_H_E_SCHEDULER_H_ diff --git a/runtime/neurun/core/src/compiler/IScheduler.h b/runtime/neurun/core/src/compiler/IScheduler.h new file mode 100644 index 000000000..dc1373ff9 --- /dev/null +++ b/runtime/neurun/core/src/compiler/IScheduler.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_CORE_COMPILER_I_SCHEDULER_H__ +#define __NEURUN_CORE_COMPILER_I_SCHEDULER_H__ + +#include "BackendResolver.h" +#include "ir/Graph.h" + +namespace neurun +{ +namespace compiler +{ + +struct IScheduler +{ + virtual ~IScheduler() = default; + + virtual std::unique_ptr<BackendResolver> schedule(const ir::Graph &graph) = 0; +}; + +} // namespace compiler +} // namespace neurun + +#endif // __NEURUN_CORE_COMPILER_I_SCHEDULER_H__ diff --git a/runtime/neurun/core/src/compiler/Linear.cc b/runtime/neurun/core/src/compiler/Linear.cc new file mode 100644 index 000000000..b18dcea02 --- /dev/null +++ b/runtime/neurun/core/src/compiler/Linear.cc @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <algorithm> + +#include "Linear.h" + +#include "backend/IShapeFixer.h" +#include "backend/IConfig.h" +#include "backend/IConstantInitializer.h" +#include "backend/ITensorRegister.h" +#include "backend/Backend.h" +#include "compiler/SubTensorInfo.h" + +#include "util/logging.h" + +namespace neurun +{ +namespace compiler +{ + +Linear::Linear(ir::Graph &graph) : _graph(graph) +{ + // Get SubgraphSequence by topological sorting + { + ir::Subgraphs &subgraphs = _graph.subgraphs(); + ir::Operands &operands = _graph.operands(); + // subgraphs can't access a op_seq by an operand so that input_to_subgs can offer it + std::unordered_map<ir::OperandIndex, std::list<ir::SubgraphIndex>> input_to_subgs; + + // Get the relations between input/op_seq to be used for dfs-post-iter + // + // [0] # input -> _input_to_op_seqes[0] = {SUBG0} + // | + // [SUBG0] + // | + // [1]-----. # input -> _input_to_op_seqes[1] = {SUBG1, SUBG2} + // | | + // [SUBG1] [SUBG2] + // | | + // [2] [3] # input -> _input_to_op_seqes[2] = {SUBG3} + // \ / # input -> _input_to_op_seqes[3] = {SUBG3} + // [SUBG3] + // | + // [4] + subgraphs.iterate([&](const ir::SubgraphIndex &subg_idx, ir::OpSequence &subg) { + for (auto input : subg.getInputs()) + { + // only valid_inputs + const auto &operand = operands.at(input); + if (operand.isConstant()) + continue; + + auto it = input_to_subgs.find(input); + if (it == input_to_subgs.end()) + { + std::list<ir::SubgraphIndex> list{subg_idx}; + input_to_subgs[input] = list; + } + else + { + it->second.push_back(subg_idx); + } + } + }); + + std::unordered_map<ir::SubgraphIndex, bool> visited; + subgraphs.iterate( + [&](const ir::SubgraphIndex &index, const ir::OpSequence &) { visited[index] = false; }); + + std::function<void(const ir::SubgraphIndex &, ir::OpSequence &)> dfs_recursive = + [&](const ir::SubgraphIndex &index, ir::OpSequence &subg) -> void { + if (visited[index]) + return; + visited[index] = true; + + // The outputs should be not constants + for (auto output : subg.getOutputs()) + { + const auto it = input_to_subgs.find(output); + if (it != input_to_subgs.end()) + { + const auto &subg_index_list = it->second; + for (const auto &index : subg_index_list) + { + auto &subg = subgraphs.at(index); + dfs_recursive(index, subg); + } + } + } + + _elements.emplace_back(&subgraphs.at(index), _graph.getLowerInfo(index)); + }; + + subgraphs.iterate(dfs_recursive); + + // All of the nodes must have been visited. + assert(std::all_of(visited.begin(), visited.end(), + [](const std::pair<const ir::SubgraphIndex, bool> &v) { return v.second; })); + + // NOTE. Now these op_seq are on the reverse order + std::reverse(_elements.begin(), _elements.end()); + } + + { + const auto &backendToString = [](const neurun::backend::Backend *backend) { + assert(backend); + std::string str; + str += backend->config()->id(); + str += " "; + return "{ " + str + "}"; + }; + + VERBOSE(Linear) << "Final SubgraphSequence" << std::endl; + for (const auto &element : _elements) + { + const auto subg = element.op_seq; + const auto lower_info = element.lower_info; + VERBOSE(Linear) << "* SUBG" + << " " << backendToString(lower_info->backend()) << " " << subg->getStr() + << std::endl; + } + } +} + +void Linear::accept(ir::OperationVisitor &&visitor) const +{ + for (const auto &e : _elements) + { + e.op_seq->accept(visitor); + } +} + +void Linear::planTensors() +{ + ir::OperandIndexMap<std::shared_ptr<backend::ITensorBuilder>> tensor_builder_map; + + // NOTE + // While current ITensorBuilder exposes registerSubTensorInfo for subtensor, + // this stage uses registerSubTensorInfo() and notify{First|Last}Use() + // but handling subtensor should be processed on each backend. See #5726. + ir::OperandIndexMap<uint32_t> uses_map; + ir::OperandIndexMap<uint32_t> def_map; + ir::OperandIndexSequence constants; + + iterate([&](const neurun::compiler::Linear::Element &element) { + const auto backend = element.lower_info->backend(); + const auto tensor_register = + _graph.backend_resolver()->getBackendContext(backend)->tensor_register; + tensor_register->registerTensors(*element.op_seq, _graph.getLowerInfo()); + }); + + // Prepare scanning + _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { + const auto lower_info = _graph.getLowerInfo(ind); + // TODO Remove if neurun doesn't support anymore such as + // GeneratedTests.reshape_quant8_weights_as_inputs + if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 && + !_graph.getInputs().contains(ind)) + { + VERBOSE(LINEAR) << "Operand #" << ind.value() << " will be not used. no more process." + << std::endl; + return; + } + + uses_map[ind] = obj.getUses().size(); + def_map[ind] = obj.getDef().size(); // should be 1 or 0 + + bool is_const = obj.isConstant(); + if (is_const) + { + constants.append(ind); + } + + for (auto factor : lower_info->def_factors()) + { + auto backend = factor.backend(); + auto tensor_builder = _graph.backend_resolver()->getBackendContext(backend)->tensor_builder; + + if (!tensor_builder->isRegistered(ind)) + { + // These tensors do not exist in any op_seq (No use and def) + // These tensors cannot be a SubTensor + assert(obj.parent_info() == nullptr); + + const auto info = obj.info(); + const auto backend_layout = lower_info->def_factors().getOnlyElement().layout(); + // TODO Change tensor info to have permuted shape + tensor_builder->registerTensorInfo(ind, info, backend_layout, is_const); + } + + tensor_builder_map[ind] = tensor_builder; + } + }); + + // If a tensor is model output, increase the use of the tensor. + // This aim is same to above one. + for (const auto &ind : _graph.getOutputs()) + { + uses_map[ind]++; + } + + // Start scanning to do notify{First|Last}Use for each tensor + + // If a tensor is a constant, increase the use of the tensor. + // It makes the tensor not be dealloced. It means these will be deallocated last. + // And allocate constant operands first + VERBOSE(LINEAR) << "TENSORS as CONSTANT" << std::endl; + for (const auto &ind : constants) + { + uses_map[ind]++; + tensor_builder_map[ind]->notifyFirstUse(ind); + } + + // Allocate Model's inputs + VERBOSE(LINEAR) << "TENSORS as MODEL INPUT" << std::endl; + for (const auto &ind : _graph.getInputs()) + { + auto tensor_builder = tensor_builder_map[ind]; + if (!tensor_builder) // for GeneratedTests.xxx_weights_as_inputs + continue; + tensor_builder->notifyFirstUse(ind); + } + + // At each operation, + // 1. Scan DEF of outputs. If the DEF, allocate it + // 2. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 + VERBOSE(LINEAR) << "TENSORS" << std::endl; + for (const auto &e : _elements) + { + for (const auto &op : e.op_seq->operations()) + { + for (const auto &ind : op.node->getOutputs()) + { + assert(def_map.find(ind) != def_map.end()); + if (def_map[ind]) + { + def_map[ind] = 0; + tensor_builder_map[ind]->notifyFirstUse(ind); + } + } + + for (const auto &ind : op.node->getInputs()) + { + assert(uses_map.find(ind) != uses_map.end()); + assert(uses_map[ind] > 0); + uses_map[ind]--; + if (uses_map[ind] == 0) + { + tensor_builder_map[ind]->notifyLastUse(ind); + } + } + } + } + + // Dispose and validate + for (const auto &ind : _graph.getOutputs()) + { + --uses_map[ind]; + if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice + { + tensor_builder_map[ind]->notifyLastUse(ind); + } + } + + for (const auto &ind : constants) + { + --uses_map[ind]; + if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice + { + tensor_builder_map[ind]->notifyLastUse(ind); + } + } + + assert( + std::all_of(uses_map.begin(), uses_map.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); + + assert( + std::all_of(def_map.begin(), def_map.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); +} + +void Linear::iterate(const std::function<void(const Element &element)> &fn) const +{ + for (const auto &e : _elements) + { + fn(e); + } +} + +void Linear::generateConstantInitializers(void) const +{ + iterate([&](const compiler::Linear::Element &element) { + auto backend = element.lower_info->backend(); + + auto constant_initializer = + _graph.backend_resolver()->getBackendContext(backend)->constant_initializer; + constant_initializer->generate(*element.op_seq, _graph.operands()); + }); +} + +} // namespace compiler +} // namespace neurun diff --git a/runtime/neurun/core/src/compiler/Linear.h b/runtime/neurun/core/src/compiler/Linear.h new file mode 100644 index 000000000..e10d03695 --- /dev/null +++ b/runtime/neurun/core/src/compiler/Linear.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_COMPILER_LINEAR_H__ +#define __NEURUN_COMPILER_LINEAR_H__ + +#include <vector> +#include <memory> + +#include "ir/Subgraphs.h" +#include "backend/ITensorBuilder.h" +#include "ir/Graph.h" +#include "compiler/BackendResolver.h" + +namespace neurun +{ +namespace ir +{ +struct OperationVisitor; +} // namespace ir +} // namespace neurun + +namespace neurun +{ +namespace compiler +{ + +class Linear +{ +public: + struct Element + { + const ir::OpSequence *op_seq; + const ir::operation::LowerInfo *lower_info; + + Element() : op_seq{nullptr}, lower_info{nullptr} {} + + Element(const ir::OpSequence *op_seq, const ir::operation::LowerInfo *lower_info) + : op_seq{op_seq}, lower_info{lower_info} + { + // DO NOTHING + } + }; + +public: + Linear(ir::Graph &graph); + +public: + Linear(const Linear &linear) = delete; + +public: + void accept(ir::OperationVisitor &&visitor) const; + + void planTensors(); + + void iterate(const std::function<void(const Element &element)> &fn) const; + + void generateConstantInitializers(void) const; + +private: + ir::Graph &_graph; + std::vector<Element> _elements; +}; + +} // namespace compiler +} // namespace neurun + +#endif // __NEURUN_COMPILER_LINEAR_H__ diff --git a/runtime/neurun/core/src/compiler/ManualScheduler.cc b/runtime/neurun/core/src/compiler/ManualScheduler.cc new file mode 100644 index 000000000..1fb9d3759 --- /dev/null +++ b/runtime/neurun/core/src/compiler/ManualScheduler.cc @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ManualScheduler.h" +#include "ir/OpCode.h" +#include "ir/Operations.Include.h" +#include "backend/Backend.h" +#include "backend/BackendManager.h" +#include "backend/IConfig.h" +#include "util/ConfigSource.h" +#include "misc/string_helpers.h" + +namespace neurun +{ +namespace compiler +{ + +std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &graph) +{ + auto backend_resolver = nnfw::cpp14::make_unique<compiler::BackendResolver>( + graph.operands(), backend::BackendManager::get().getAll(), graph.getKernelBuilder()); + + // 1. Backend for All operations + auto backend_all_str = util::getConfigString(util::config::OP_BACKEND_ALLOPS); + backend::Backend *backend_all = nullptr; + + if (backend_all_str.empty()) + { + const auto backends_str = util::getConfigString(util::config::BACKENDS); + size_t prev_pos = 0; + auto pos = backends_str.find(";"); + while (pos != std::string::npos) + { + backend_all_str = backends_str.substr(prev_pos, pos - prev_pos); + backend_all = backend::BackendManager::get().get(backend_all_str); + if (backend_all != nullptr) + break; + + prev_pos = pos + 1; + pos = backends_str.find(";", prev_pos); + } + // if backends doesn't terminate with ";" + if (backend_all == nullptr && prev_pos < backends_str.size()) + { + backend_all_str = backends_str.substr(prev_pos); + backend_all = backend::BackendManager::get().get(backend_all_str); + } + } + else + { + backend_all = backend::BackendManager::get().get(backend_all_str); + } + + VERBOSE(ManualScheduler) << "Default backend for all ops: " << backend_all_str << std::endl; + + graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) { + backend_resolver->setBackend(index, backend_all); + }); + + // 2. Backend per operation type + std::unordered_map<ir::OpCode, backend::Backend *> op_type_map; + // By default, Custom uses cpu backend + op_type_map[ir::OpCode::Custom] = backend::BackendManager::get().get("cpu"); + +#define OP(InternalName) \ + { \ + const auto &backend_str = util::getConfigString(util::config::OP_BACKEND_##InternalName); \ + if (!backend_str.empty()) \ + { \ + auto backend = backend::BackendManager::get().get(backend_str); \ + VERBOSE(Lower) << "backend for " << #InternalName << ": " << backend_str << std::endl; \ + op_type_map[ir::OpCode::InternalName] = backend; \ + } \ + } +#include "ir/Operations.lst" +#undef OP + + graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &operation) { + auto itr = op_type_map.find(operation.opcode()); + if (itr != op_type_map.end()) + { + backend_resolver->setBackend(index, itr->second); + } + }); + + // 3. Backend per operation + try + { + auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP); + auto key_val_list = nnfw::misc::split(map_str, ';'); + for (const auto &key_val_str : key_val_list) + { + if (key_val_str.empty()) + { + continue; + } + + auto key_val = nnfw::misc::split(key_val_str, '='); + const auto &key_str = key_val.at(0); + const auto &val = key_val.at(1); + auto key = static_cast<uint32_t>(std::stoi(key_str)); + + graph.operations().at(ir::OperationIndex{key}); // Check if exist, or this wil throw + backend_resolver->setBackend(ir::OperationIndex{key}, + backend::BackendManager::get().get(val)); + } + } + catch (...) + { + VERBOSE(ManualScheduler) << "Invalid value from " << util::config::OP_BACKEND_MAP + << ". Some of the given values are ignored" << std::endl; + } + + // 4. Operations that are specially handled + // All configuration above will be ignored(overwritten) + op_type_map[ir::OpCode::Permute] = backend::BackendManager::get().get("cpu"); + + // Dump final assignment + backend_resolver->iterate( + [&](const ir::OperationIndex &index, const backend::BackendContext &backend_ctx) { + VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": " + << backend_ctx.backend->config()->id() << std::endl; + }); + + return backend_resolver; +} + +} // namespace compiler +} // namespace neurun diff --git a/runtime/neurun/core/src/compiler/ManualScheduler.h b/runtime/neurun/core/src/compiler/ManualScheduler.h new file mode 100644 index 000000000..a442cec08 --- /dev/null +++ b/runtime/neurun/core/src/compiler/ManualScheduler.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_CORE_COMPILER_MANUAL_SCHEDULER_H__ +#define __NEURUN_CORE_COMPILER_MANUAL_SCHEDULER_H__ + +#include "IScheduler.h" + +namespace neurun +{ +namespace compiler +{ + +class ManualScheduler : public IScheduler +{ +public: + std::unique_ptr<BackendResolver> schedule(const ir::Graph &graph) override; +}; + +} // namespace compiler +} // namespace neurun + +#endif // __NEURUN_CORE_COMPILER_MANUAL_SCHEDULER_H__ diff --git a/runtime/neurun/core/src/compiler/OperandContext.cc b/runtime/neurun/core/src/compiler/OperandContext.cc new file mode 100644 index 000000000..c06f6157b --- /dev/null +++ b/runtime/neurun/core/src/compiler/OperandContext.cc @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OperandContext.h" + +#include <cassert> + +namespace neurun +{ +namespace compiler +{ + +OperandContext &OperandContext::set(const ir::OperandIndex &id, + const std::shared_ptr<backend::operand::ITensor> &tensor) +{ + // Only one tensor for an id + assert(_tensors.find(id) == _tensors.end()); + _tensors[id] = tensor; + return (*this); +} + +void OperandContext::iterate( + const std::function<void(const ir::OperandIndex &, backend::operand::ITensor &)> &fn) +{ + for (auto &e : _tensors) + { + fn(e.first, *e.second); + } +} + +} // namespace compiler +} // namespace neurun diff --git a/runtime/neurun/core/src/compiler/OperandContext.h b/runtime/neurun/core/src/compiler/OperandContext.h new file mode 100644 index 000000000..da1a51bb9 --- /dev/null +++ b/runtime/neurun/core/src/compiler/OperandContext.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_COMPILER_OPERAND_CONTEXT_H__ +#define __NEURUN_COMPILER_OPERAND_CONTEXT_H__ + +#include "backend/operand/ITensor.h" +#include "ir/OperandIndexMap.h" +#include <unordered_map> +#include <memory> + +namespace neurun +{ +namespace compiler +{ + +class OperandContext +{ +public: + OperandContext &set(const ir::OperandIndex &ind, + const std::shared_ptr<backend::operand::ITensor> &tensor); + +public: + bool exist(const ir::OperandIndex &ind) const { return _tensors.find(ind) != _tensors.end(); } + +public: + std::shared_ptr<backend::operand::ITensor> at(const ir::OperandIndex &ind) const + { + return _tensors.at(ind); + } + + std::shared_ptr<backend::operand::ITensor> &at(const ir::OperandIndex &ind) + { + return _tensors.at(ind); + } + + void + iterate(const std::function<void(const ir::OperandIndex &, backend::operand::ITensor &)> &fn); + +private: + ir::OperandIndexMap<std::shared_ptr<backend::operand::ITensor>> _tensors; +}; + +} // namespace compiler +} // namespace neurun + +#endif // __NEURUN_COMPILER_OPERAND_CONTEXT_H__ diff --git a/runtime/neurun/core/src/compiler/OperationValidator.cc b/runtime/neurun/core/src/compiler/OperationValidator.cc new file mode 100644 index 000000000..74f76bdbe --- /dev/null +++ b/runtime/neurun/core/src/compiler/OperationValidator.cc @@ -0,0 +1,985 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OperationValidator.h" + +#include <typeinfo> + +#include "ir/Graph.h" +#include "ir/operation/LowerInfo.h" + +#include "util/logging.h" +#include "util/Utils.h" + +namespace neurun +{ +namespace compiler +{ + +OperationValidator::OperationValidator(const ir::Graph &graph) + : _graph{graph}, _ctx{graph.operands()}, _current_subg_layout{ir::Layout::UNKNOWN} +{ +} + +void OperationValidator::operator()() +{ + // TODO Get frontend layout from graph + _current_subg_layout = ir::Layout::NHWC; + + _graph.operations().iterate( + [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); }); +} + +void OperationValidator::visit(const ir::operation::BatchToSpaceND &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)}; + const auto block_size_index{ + node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; + + const auto frontend_layout = _current_subg_layout; + const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); + const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); + + UNUSED_RELEASE(input_shape); + UNUSED_RELEASE(output_shape); + + // All assertions as per NNAPI specification. + assert(_ctx.at(ifm_index).shape().rank() == 4); + assert(_ctx.at(ofm_index).shape().rank() == 4); + assert(_ctx.at(block_size_index).shape().rank() == 1); + + assert(_ctx.at(block_size_index).shape().dim(0) == 2); + + assert(_ctx.at(block_size_index).isConstant()); + + assert(input_shape.C == output_shape.C); +} + +void OperationValidator::visit(const ir::operation::Cast &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(0)}; + + UNUSED_RELEASE(output_index); + UNUSED_RELEASE(input_index); + + assert(_ctx.at(output_index).shape() == _ctx.at(input_index).shape()); +} + +void OperationValidator::visit(const ir::operation::Comparison &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)}; + const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)}; + + UNUSED_RELEASE(output_index); + UNUSED_RELEASE(lhs_index); + UNUSED_RELEASE(rhs_index); + + assert(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type()); + assert(_ctx.at(output_index).typeInfo().type() == ir::DataType::BOOL8); +} + +void OperationValidator::visit(const ir::operation::Softmax &node) +{ + VERBOSE(Softmax) << "Configure SOFTMAX operation" << std::endl; + + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(0)}; + + UNUSED_RELEASE(output_index); + UNUSED_RELEASE(input_index); + + assert(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank()); +} + +void OperationValidator::visit(const ir::operation::InstanceNorm &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)}; + const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)}; + const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)}; + + UNUSED_RELEASE(ofm_index); + UNUSED_RELEASE(ifm_index); + UNUSED_RELEASE(gamma_index); + UNUSED_RELEASE(beta_index); + + assert(_ctx.at(ifm_index).shape().rank() == 4); + assert(_ctx.at(ifm_index).shape() == _ctx.at(ofm_index).shape()); + assert(_ctx.at(gamma_index).shape().rank() == 1); + assert(_ctx.at(beta_index).shape().rank() == 1); +} + +void OperationValidator::visit(const ir::operation::Permute &node) +{ + VERBOSE(Permute) << "Configure Permute operation" << std::endl; + + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(0)}; + + UNUSED_RELEASE(output_index); + UNUSED_RELEASE(input_index); + + assert(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank()); +} + +void OperationValidator::visit(const ir::operation::ReduceSum &node) +{ + VERBOSE(Permute) << "Configure ReduceSum operation" << std::endl; + + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::ReduceSum::Input::INPUT)}; + const auto &axes = node.param().axes; + + UNUSED_RELEASE(output_index); + UNUSED_RELEASE(input_index); + UNUSED_RELEASE(axes); + + const auto input_shape = _ctx.at(input_index).shape(); + const auto output_shape = _ctx.at(output_index).shape(); + + UNUSED_RELEASE(output_shape); + UNUSED_RELEASE(input_shape); + + assert(input_shape.rank() <= 4); + assert(output_shape.rank() <= input_shape.rank()); + + // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only + // supports cases reducing height and width or reducing depth. + // TODO We have to support all cases of dimensions up to 4. + // For correct permuting, we have to set output's shape to be equal in dimension position of the + // input. But the positions of the same dimensions in the input and output may be set differently. + // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original + // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to + // extend it in 4 dimensions, it should be {1,1,3,5}. + // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of + // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the + // next operation is not desired. + if (input_shape.rank() == 4 && input_shape.rank() != output_shape.rank()) + { + if (output_shape.rank() == 2) + { + // Reducing HW + assert(input_shape.dim(0) == output_shape.dim(0) && + input_shape.dim(3) == output_shape.dim(1)); + } + else if (output_shape.rank() == 3) + { + // Reducing C or + // (Reducing H and C(input and output) == 1) or (Reducing W and C(input and output) == 1) + assert((input_shape.dim(0) == output_shape.dim(0) && + input_shape.dim(1) == output_shape.dim(1) && + input_shape.dim(2) == output_shape.dim(2)) || + (input_shape.dim(0) == output_shape.dim(0) && + (input_shape.dim(1) == output_shape.dim(1) || + input_shape.dim(2) == output_shape.dim(1)) && + input_shape.dim(3) == 1 && output_shape.dim(2) == 1)); + } + } +} + +void OperationValidator::visit(const ir::operation::Transpose &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)}; + const auto &perm{node.param().perm}; + + const auto &output_shape = _ctx.at(output_index).shape(); + const auto &input_shape = _ctx.at(input_index).shape(); + + UNUSED_RELEASE(output_shape); + UNUSED_RELEASE(input_shape); + UNUSED_RELEASE(perm); + + assert(input_shape.rank() == static_cast<int>(perm.size())); + assert(input_shape.rank() == output_shape.rank()); +} + +void OperationValidator::visit(const ir::operation::ReduceMax &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::ReduceMax::Input::INPUT)}; + const auto &axes = node.param().axes; + + auto output_shape = _ctx.at(output_index).shape(); + auto input_shape = _ctx.at(input_index).shape(); + + UNUSED_RELEASE(output_shape); + UNUSED_RELEASE(input_shape); + UNUSED_RELEASE(axes); + + assert(input_shape.rank() <= 4); + assert(output_shape.rank() <= input_shape.rank()); + + // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only + // supports cases reducing height and width or reducing depth. + // TODO We have to support all cases of dimensions up to 4. + // For correct permuting, we have to set output's shape to be equal in dimension position of the + // input. But the positions of the same dimensions in the input and output may be set differently. + // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original + // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to + // extend it in 4 dimensions, it should be {1,1,3,5}. + // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of + // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the + // next operation is not desired. + if (input_shape.rank() == 4 && input_shape.rank() != output_shape.rank()) + { + if (output_shape.rank() == 2) + { + // Reducing HW + assert(input_shape.dim(0) == output_shape.dim(0) && + input_shape.dim(3) == output_shape.dim(1)); + } + else if (output_shape.rank() == 3) + { + // Reducing C or + // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1) + assert((input_shape.dim(0) == output_shape.dim(0) && + input_shape.dim(1) == output_shape.dim(1) && + input_shape.dim(2) == output_shape.dim(2)) || + (input_shape.dim(0) == output_shape.dim(0) && + (input_shape.dim(1) == output_shape.dim(1) || + input_shape.dim(2) == output_shape.dim(1)) && + input_shape.dim(3) == 1 && output_shape.dim(2) == 1)); + } + } +} + +void OperationValidator::visit(const ir::operation::RNN &node) +{ + // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn + // TODO Support dynamic rnn + const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)}; + const auto hidden_state_out_index{ + node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)}; + + const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)}; + const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)}; + const auto recurrent_weights_index{ + node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)}; + const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)}; + const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)}; + + const auto batch_size = _ctx.at(output_index).shape().dim(0); + const auto num_units = _ctx.at(output_index).shape().dim(1); + + UNUSED_RELEASE(output_index); + UNUSED_RELEASE(hidden_state_out_index); + UNUSED_RELEASE(input_index); + UNUSED_RELEASE(weights_index); + UNUSED_RELEASE(recurrent_weights_index); + UNUSED_RELEASE(bias_index); + UNUSED_RELEASE(hidden_state_in_index); + UNUSED_RELEASE(batch_size); + UNUSED_RELEASE(num_units); + + assert(_ctx.at(output_index).shape().rank() == 2 && + _ctx.at(hidden_state_out_index).shape().rank() == 2 && + _ctx.at(input_index).shape().rank() == 2 && _ctx.at(weights_index).shape().rank() == 2 && + _ctx.at(recurrent_weights_index).shape().rank() == 2 && + _ctx.at(hidden_state_in_index).shape().rank() == 2); + assert(_ctx.at(bias_index).shape().rank() == 1); + + assert(batch_size == _ctx.at(input_index).shape().dim(0) && + batch_size == _ctx.at(hidden_state_in_index).shape().dim(0) && + batch_size == _ctx.at(hidden_state_out_index).shape().dim(0)); + assert(_ctx.at(input_index).shape().dim(1) == _ctx.at(weights_index).shape().dim(1)); + + assert(num_units == _ctx.at(weights_index).shape().dim(0) && + num_units == _ctx.at(recurrent_weights_index).shape().dim(0) && + num_units == _ctx.at(bias_index).shape().dim(0)); + assert(num_units == _ctx.at(output_index).shape().dim(1) && + num_units == _ctx.at(recurrent_weights_index).shape().dim(1) && + num_units == _ctx.at(hidden_state_in_index).shape().dim(1) && + num_units == _ctx.at(hidden_state_out_index).shape().dim(1)); +} + +void OperationValidator::visit(const ir::operation::SpaceToBatchND &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)}; + const auto block_size_index{ + node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; + const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; + + const auto frontend_layout = _current_subg_layout; + const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); + const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); + + UNUSED_RELEASE(input_shape); + UNUSED_RELEASE(output_shape); + + // All assertions as per NNAPI specification. + assert(_ctx.at(ifm_index).shape().rank() == 4); + assert(_ctx.at(ofm_index).shape().rank() == 4); + assert(_ctx.at(block_size_index).shape().rank() == 1); + assert(_ctx.at(paddings_index).shape().rank() == 2); + + assert(_ctx.at(block_size_index).shape().dim(0) == 2); + assert(_ctx.at(paddings_index).shape().dim(0) == 2); + assert(_ctx.at(paddings_index).shape().dim(1) == 2); + + assert(_ctx.at(block_size_index).isConstant()); + assert(_ctx.at(paddings_index).isConstant()); + + assert(input_shape.C == output_shape.C); +} + +void OperationValidator::visit(const ir::operation::SpaceToDepth &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)}; + + const auto frontend_layout = _current_subg_layout; + const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); + const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); + const auto block_size = node.param().block_size; + + UNUSED_RELEASE(input_shape); + UNUSED_RELEASE(output_shape); + UNUSED_RELEASE(block_size); + + // All assertions as per NNAPI specification. + assert(_ctx.at(ifm_index).shape().rank() == 4); + assert(_ctx.at(ofm_index).shape().rank() == 4); + assert((block_size >= 1) && (input_shape.H % block_size == 0) && + (input_shape.W % block_size == 0)); + assert(input_shape.N == output_shape.N); + assert(input_shape.C * block_size * block_size == output_shape.C); +} + +void OperationValidator::visit(const ir::operation::EmbeddingLookup &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)}; + const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)}; + + const auto &output_obj = _ctx.at(output_index); + const auto &lookups_obj = _ctx.at(lookups_index); + const auto &values_obj = _ctx.at(values_index); + + UNUSED_RELEASE(output_obj); + UNUSED_RELEASE(lookups_obj); + UNUSED_RELEASE(values_obj); + + // Verify operand here, not at SimpleEmbeddingLookup::configure() to avoid acl's modifying + // TensorShape sometimes(Issue: https://github.sec.samsung.net/STAR/nnfw/issues/729) + { + assert(lookups_obj.typeInfo().type() == ir::DataType::INT32); + + const auto &output_shape = output_obj.shape(); + const auto &lookups_shape = lookups_obj.shape(); + const auto &values_shape = values_obj.shape(); + + UNUSED_RELEASE(output_shape); + UNUSED_RELEASE(lookups_shape); + UNUSED_RELEASE(values_shape); + + assert(lookups_shape.rank() == 1); + assert(values_shape.rank() >= 2); + + // output should be a n-D tensor with the same rank and shape as the values tensor, except for + // the first dimension which has the same size as lookups' only dimension. + assert(output_shape.rank() == values_shape.rank()); + assert(output_shape.dim(0) == lookups_shape.dim(0)); + for (int n = 1; n < output_shape.rank(); ++n) + { + assert(output_shape.dim(n) == values_shape.dim(n)); + } + } +} + +void OperationValidator::visit(const ir::operation::Exp &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)}; + + UNUSED_RELEASE(output_index); + UNUSED_RELEASE(input_index); + + assert(_ctx.at(output_index).shape() == _ctx.at(input_index).shape()); + assert(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type()); +} + +void OperationValidator::visit(const ir::operation::Floor &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)}; + + UNUSED_RELEASE(output_index); + UNUSED_RELEASE(input_index); + + assert(_ctx.at(output_index).shape() == _ctx.at(input_index).shape()); + assert(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type()); +} + +void OperationValidator::visit(const ir::operation::HashtableLookup &node) +{ + const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)}; + const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)}; + + const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)}; + const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)}; + const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)}; + + const auto &output_obj = _ctx.at(output_index); + const auto &hits_obj = _ctx.at(hits_index); + + const auto &lookups_obj = _ctx.at(lookups_index); + const auto &keys_obj = _ctx.at(keys_index); + const auto &values_obj = _ctx.at(values_index); + + assert(lookups_obj.typeInfo().type() == ir::DataType::INT32); + assert(keys_obj.typeInfo().type() == ir::DataType::INT32); + assert(hits_obj.typeInfo().type() == ir::DataType::QUANT8_ASYMM); + + const auto &output_shape = output_obj.shape(); + const auto &hits_shape = hits_obj.shape(); + + const auto &lookups_shape = lookups_obj.shape(); + const auto &keys_shape = keys_obj.shape(); + const auto &values_shape = values_obj.shape(); + + UNUSED_RELEASE(output_shape); + UNUSED_RELEASE(hits_shape); + UNUSED_RELEASE(lookups_shape); + UNUSED_RELEASE(keys_shape); + UNUSED_RELEASE(values_shape); + + assert(values_shape.rank() == output_shape.rank()); + assert(lookups_shape.rank() == 1); + assert(keys_shape.rank() == 1); + assert(values_shape.dim(0) == keys_shape.dim(0)); + assert(lookups_shape.dim(0) == output_shape.dim(0)); +} + +void OperationValidator::visit(const ir::operation::TransposeConv &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)}; + const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)}; + + // Only 4D tensors are supported + assert(_ctx.at(ofm_index).shape().rank() == 4); + assert(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank()); + assert(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank()); + + const auto frontend_layout = _current_subg_layout; + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); + // The kernel has only IHWO layout on frontend + // So ker_shape is treated here below + // I -> N + // H -> H + // W -> W + // O -> C + const auto ker_shape = _ctx.at(ker_index).shape().asFeature(ir::Layout::NHWC); + + UNUSED_RELEASE(ofm_shape); + UNUSED_RELEASE(ifm_shape); + UNUSED_RELEASE(ker_shape); + + assert((node.param().padding.type == ir::PaddingType::SAME) || + (node.param().padding.type == ir::PaddingType::VALID)); + assert(ifm_shape.N == ofm_shape.N); + assert(ifm_shape.C == ker_shape.C); + assert(ker_shape.N == ofm_shape.C); +} + +void OperationValidator::visit(const ir::operation::Gather &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + + const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)}; + const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)}; + + const auto axis = node.param().axis; + + const auto ifm_shape = _ctx.at(ifm_index).shape(); + const auto indices_shape = _ctx.at(indices_index).shape(); + const auto ofm_shape = _ctx.at(ofm_index).shape(); + + UNUSED_RELEASE(ifm_shape); + UNUSED_RELEASE(indices_shape); + UNUSED_RELEASE(ofm_shape); + UNUSED_RELEASE(axis); + + assert(ifm_shape.rank() <= 4); + assert(indices_shape.rank() <= 3); + assert(ofm_shape.rank() <= 4); +} + +void OperationValidator::visit(const ir::operation::Dequantize &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)}; + + UNUSED_RELEASE(output_index); + UNUSED_RELEASE(input_index); + + assert(_ctx.at(input_index).shape().rank() <= 4); + assert(_ctx.at(input_index).shape() == _ctx.at(output_index).shape()); + assert(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM); + assert(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32); +} + +void OperationValidator::visit(const ir::operation::Mean &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Mean::Input::INPUT)}; + + const auto ifm_shape = _ctx.at(ifm_index).shape(); + const auto ofm_shape = _ctx.at(ofm_index).shape(); + + // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only + // supports cases reducing height and width or reducing depth. + // TODO We have to support all cases of dimensions up to 4. + // For correct permuting, we have to set output's shape to be equal in dimension position of the + // input. But the positions of the same dimensions in the input and output may be set differently. + // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original + // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to + // extend it in 4 dimensions, it should be {1,1,3,5}. + // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of + // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the + // next operation is not desired. + if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank()) + { + if (ofm_shape.rank() == 2) + { + // Reducing HW + assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1)); + } + else if (ofm_shape.rank() == 3) + { + // Reducing C or + // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1) + assert((ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) && + ifm_shape.dim(2) == ofm_shape.dim(2)) || + (ifm_shape.dim(0) == ofm_shape.dim(0) && + (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) && + ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1)); + } + } +} + +void OperationValidator::visit(const ir::operation::DepthToSpace &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)}; + + const auto frontend_layout = _current_subg_layout; + const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout); + const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout); + + UNUSED_RELEASE(output_shape); + UNUSED_RELEASE(input_shape); + + assert(_ctx.at(input_index).shape().rank() == 4); + assert(_ctx.at(output_index).shape().rank() == 4); + + int32_t block_size = node.param().block_size; + + UNUSED_RELEASE(block_size); + + assert(block_size > 0); + + { // assertions block + assert(output_shape.N == input_shape.N); + assert(output_shape.H == input_shape.H * block_size); + assert(output_shape.W == input_shape.W * block_size); + assert(input_shape.C % (block_size * block_size) == 0); + assert(output_shape.C == input_shape.C / (block_size * block_size)); + } +} + +void OperationValidator::visit(const ir::operation::Pack &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto num{node.param().num}; + const auto axis{node.param().axis}; + + const auto &output_shape = _ctx.at(output_index).shape(); + const auto output_rank = static_cast<int32_t>(output_shape.rank()); + + const auto input1_index{node.getInputs().at(0)}; + const auto input_shape = _ctx.at(input1_index).shape(); + + UNUSED_RELEASE(num); + UNUSED_RELEASE(axis); + UNUSED_RELEASE(output_rank); + + assert(num == static_cast<int32_t>(node.getInputs().size())); + assert(axis >= -output_rank && axis < output_rank); + for (const auto &index : node.getInputs()) + { + UNUSED_RELEASE(index); + assert(input_shape == _ctx.at(index).shape()); + } +} + +void OperationValidator::visit(const ir::operation::ReduceMin &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::ReduceMin::Input::INPUT)}; + const auto &axes = node.param().axes; + + auto ifm_shape = _ctx.at(ifm_index).shape(); + auto ofm_shape = _ctx.at(ofm_index).shape(); + + UNUSED_RELEASE(ifm_shape); + UNUSED_RELEASE(ofm_shape); + UNUSED_RELEASE(axes); + + assert(ifm_shape.rank() <= 4); + assert(ofm_shape.rank() <= ifm_shape.rank()); + + // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only + // supports cases reducing height and width or reducing depth. + // TODO We have to support all cases of dimensions up to 4. + // For correct permuting, we have to set output's shape to be equal in dimension position of the + // input. But the positions of the same dimensions in the input and output may be set differently. + // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original + // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to + // extend it in 4 dimensions, it should be {1,1,3,5}. + // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of + // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the + // next operation is not desired. + if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank()) + { + if (ofm_shape.rank() == 2) + { + // Reducing HW + assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1)); + } + else if (ofm_shape.rank() == 3) + { + // Reducing C or + // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1) + assert((ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) && + ifm_shape.dim(2) == ofm_shape.dim(2)) || + (ifm_shape.dim(0) == ofm_shape.dim(0) && + (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) && + ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1)); + } + } +} + +void OperationValidator::visit(const ir::operation::LSTM &node) +{ + // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn + // TODO Support dynamic rnn + const auto scratch_buffer_index{ + node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; + const auto output_state_out_index{ + node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; + const auto cell_state_out_index{ + node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)}; + const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)}; + + const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)}; + const auto input_to_input_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; + const auto input_to_forget_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)}; + const auto input_to_cell_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)}; + const auto input_to_output_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)}; + const auto recurrent_to_input_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; + const auto recurrent_to_forget_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)}; + const auto recurrent_to_cell_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)}; + const auto recurrent_to_output_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)}; + const auto cell_to_input_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; + const auto cell_to_forget_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; + const auto cell_to_output_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; + const auto input_gate_bias_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)}; + const auto forget_gate_bias_index{ + node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)}; + const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)}; + const auto output_gate_bias_index{ + node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)}; + const auto projection_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; + const auto projection_bias_index{ + node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; + const auto output_state_in_index{ + node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)}; + const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)}; + + UNUSED_RELEASE(scratch_buffer_index); + UNUSED_RELEASE(output_state_out_index); + UNUSED_RELEASE(cell_state_out_index); + UNUSED_RELEASE(output_index); + + UNUSED_RELEASE(input_index); + UNUSED_RELEASE(input_to_input_weights_index); + UNUSED_RELEASE(input_to_forget_weights_index); + UNUSED_RELEASE(input_to_cell_weights_index); + UNUSED_RELEASE(input_to_output_weights_index); + UNUSED_RELEASE(recurrent_to_input_weights_index); + UNUSED_RELEASE(recurrent_to_forget_weights_index); + UNUSED_RELEASE(recurrent_to_cell_weights_index); + UNUSED_RELEASE(recurrent_to_output_weights_index); + UNUSED_RELEASE(cell_to_input_weights_index); + UNUSED_RELEASE(cell_to_forget_weights_index); + UNUSED_RELEASE(cell_to_output_weights_index); + UNUSED_RELEASE(input_gate_bias_index); + UNUSED_RELEASE(forget_gate_bias_index); + UNUSED_RELEASE(cell_bias_index); + UNUSED_RELEASE(output_gate_bias_index); + UNUSED_RELEASE(projection_weights_index); + UNUSED_RELEASE(projection_bias_index); + UNUSED_RELEASE(output_state_in_index); + UNUSED_RELEASE(cell_state_in_index); + + assert(_ctx.at(scratch_buffer_index).shape().rank() == 2 && + _ctx.at(output_state_out_index).shape().rank() == 2 && + _ctx.at(cell_state_out_index).shape().rank() == 2 && + _ctx.at(output_index).shape().rank() == 2 && _ctx.at(input_index).shape().rank() == 2 && + _ctx.at(input_to_input_weights_index).shape().rank() == 2 && + _ctx.at(input_to_forget_weights_index).shape().rank() == 2 && + _ctx.at(input_to_cell_weights_index).shape().rank() == 2 && + _ctx.at(input_to_output_weights_index).shape().rank() == 2 && + _ctx.at(recurrent_to_input_weights_index).shape().rank() == 2 && + _ctx.at(recurrent_to_forget_weights_index).shape().rank() == 2 && + _ctx.at(recurrent_to_cell_weights_index).shape().rank() == 2 && + _ctx.at(recurrent_to_output_weights_index).shape().rank() == 2 && + _ctx.at(projection_weights_index).shape().rank() == 2 && + _ctx.at(output_state_in_index).shape().rank() == 2 && + _ctx.at(cell_state_in_index).shape().rank() == 2); + + assert(_ctx.at(cell_to_input_weights_index).shape().rank() == 1 && + _ctx.at(cell_to_forget_weights_index).shape().rank() == 1 && + _ctx.at(cell_to_output_weights_index).shape().rank() == 1 && + _ctx.at(input_gate_bias_index).shape().rank() == 1 && + _ctx.at(forget_gate_bias_index).shape().rank() == 1 && + _ctx.at(cell_bias_index).shape().rank() == 1 && + _ctx.at(output_gate_bias_index).shape().rank() == 1 && + _ctx.at(projection_bias_index).shape().rank() == 1); + + // CIFG assertion + assert((_ctx.at(input_to_input_weights_index).shape().dim(0) == 0 && + _ctx.at(input_to_input_weights_index).shape().dim(1) == 0 && + _ctx.at(recurrent_to_input_weights_index).shape().dim(0) == 0 && + _ctx.at(recurrent_to_input_weights_index).shape().dim(1) == 0 && + _ctx.at(input_gate_bias_index).shape().dim(0) == 0 && + _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0) || + (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 && + _ctx.at(input_to_input_weights_index).shape().dim(1) != 0 && + _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && + _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0 && + _ctx.at(input_gate_bias_index).shape().dim(0) != 0)); + + // Peephole assertion + assert((_ctx.at(cell_to_forget_weights_index).shape().dim(0) == 0 && + _ctx.at(cell_to_output_weights_index).shape().dim(0) == 0) || + (_ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0 && + _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0)); + + bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 && + _ctx.at(input_to_input_weights_index).shape().dim(1) != 0; + bool has_recurrent_to_input_weights = + _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && + _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0; + bool has_input_gate_bias = _ctx.at(input_gate_bias_index).shape().dim(0) != 0; + bool has_cell_to_input_weights = _ctx.at(cell_to_input_weights_index).shape().dim(0) != 0; + bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0; + bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0; + bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 && + _ctx.at(projection_weights_index).shape().dim(1) != 0; + bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0); + + // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG). + // true: no CIFG + // false: CIFG + bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights; + + // NOTE The cell_to_input_weights do not exist in regular CIFG although peephole. + // true: peephole + // false: no peephole + bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights; + + // NOTE The projection weights may have data but the projection bias may not. + bool has_projection_param = has_projection_weights; + + UNUSED_RELEASE(has_input_to_input_weights); + UNUSED_RELEASE(has_recurrent_to_input_weights); + UNUSED_RELEASE(has_input_gate_bias); + UNUSED_RELEASE(has_cell_to_input_weights); + UNUSED_RELEASE(has_cell_to_forget_weights); + UNUSED_RELEASE(has_cell_to_output_weights); + UNUSED_RELEASE(has_projection_weights); + UNUSED_RELEASE(has_projection_bias); + UNUSED_RELEASE(has_cifg_param); + UNUSED_RELEASE(has_peephole_param); + UNUSED_RELEASE(has_projection_param); + + const auto batch_size = _ctx.at(input_index).shape().dim(0); + UNUSED_RELEASE(batch_size); + assert(batch_size == _ctx.at(output_state_in_index).shape().dim(0) && + batch_size == _ctx.at(cell_state_in_index).shape().dim(0) && + batch_size == _ctx.at(scratch_buffer_index).shape().dim(0) && + batch_size == _ctx.at(output_state_out_index).shape().dim(0) && + batch_size == _ctx.at(cell_state_out_index).shape().dim(0) && + batch_size == _ctx.at(output_index).shape().dim(0)); + + const auto input_size = _ctx.at(input_index).shape().dim(1); + UNUSED_RELEASE(input_size); + assert(input_size == _ctx.at(input_to_forget_weights_index).shape().dim(1) && + input_size == _ctx.at(input_to_cell_weights_index).shape().dim(1) && + input_size == _ctx.at(input_to_output_weights_index).shape().dim(1)); + + const auto num_units = _ctx.at(cell_state_out_index).shape().dim(1); + UNUSED_RELEASE(num_units); + assert(num_units == _ctx.at(input_to_forget_weights_index).shape().dim(0) && + num_units == _ctx.at(input_to_cell_weights_index).shape().dim(0) && + num_units == _ctx.at(input_to_output_weights_index).shape().dim(0) && + num_units == _ctx.at(recurrent_to_forget_weights_index).shape().dim(0) && + num_units == _ctx.at(recurrent_to_cell_weights_index).shape().dim(0) && + num_units == _ctx.at(recurrent_to_output_weights_index).shape().dim(0) && + num_units == _ctx.at(forget_gate_bias_index).shape().dim(0) && + num_units == _ctx.at(cell_bias_index).shape().dim(0) && + num_units == _ctx.at(output_gate_bias_index).shape().dim(0) && + num_units == _ctx.at(cell_state_in_index).shape().dim(1) && + (((num_units * 3) == _ctx.at(scratch_buffer_index).shape().dim(1)) || + ((num_units * 4) == _ctx.at(scratch_buffer_index).shape().dim(1)))); + + const auto output_size = _ctx.at(output_index).shape().dim(1); + UNUSED_RELEASE(output_size); + assert(output_size == _ctx.at(recurrent_to_forget_weights_index).shape().dim(1) && + output_size == _ctx.at(recurrent_to_cell_weights_index).shape().dim(1) && + output_size == _ctx.at(recurrent_to_output_weights_index).shape().dim(1) && + output_size == _ctx.at(output_state_in_index).shape().dim(1) && + output_size == _ctx.at(output_state_out_index).shape().dim(1)); + + if (has_cifg_param) + { + assert(input_size == _ctx.at(input_to_input_weights_index).shape().dim(1)); + assert(num_units == _ctx.at(input_to_input_weights_index).shape().dim(0) && + num_units == _ctx.at(recurrent_to_input_weights_index).shape().dim(0) && + (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) || + _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* non-peephole */) && + num_units == _ctx.at(input_gate_bias_index).shape().dim(0)); + assert(output_size == _ctx.at(recurrent_to_input_weights_index).shape().dim(1)); + assert(has_input_to_input_weights && has_recurrent_to_input_weights && has_input_gate_bias); + if (has_cell_to_input_weights) + { + // NOTE The cell_to_input_weights exist only in case of non-CIFG and peephole. + assert(has_peephole_param); + } + assert(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 4); + } + else + { + assert(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 3); + } + + if (has_peephole_param) + { + assert(num_units == _ctx.at(cell_to_forget_weights_index).shape().dim(0) && + num_units == _ctx.at(cell_to_output_weights_index).shape().dim(0) && + (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) || + _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */)); + } + + if (has_projection_param) + { + assert(num_units == _ctx.at(projection_weights_index).shape().dim(1)); + assert(output_size == _ctx.at(projection_weights_index).shape().dim(0)); + if (has_projection_bias) + { + assert(output_size == _ctx.at(projection_bias_index).shape().dim(0)); + } + } +} + +void OperationValidator::visit(const ir::operation::Unpack &node) +{ + const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)}; + const auto num{node.param().num}; + const auto axis{node.param().axis}; + + const auto &input_shape = _ctx.at(input_index).shape(); + const auto input_rank = static_cast<int32_t>(input_shape.rank()); + + UNUSED_RELEASE(num); + UNUSED_RELEASE(axis); + UNUSED_RELEASE(input_rank); + + assert(num == static_cast<int32_t>(node.getOutputs().size())); + assert(axis >= -input_rank && axis < input_rank); +} + +void OperationValidator::visit(const ir::operation::Pad &node) +{ + const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)}; + const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)}; + const auto output_index{node.getInputs().at(0)}; + + const auto &pad_shape = _ctx.at(pad_index).shape(); + const auto input_rank = static_cast<int32_t>(_ctx.at(input_index).shape().rank()); + + UNUSED_RELEASE(pad_shape); + UNUSED_RELEASE(input_rank); + UNUSED_RELEASE(output_index); + + assert(pad_shape.rank() == 2); + assert(pad_shape.dim(0) == input_rank); + assert(pad_shape.dim(1) == 2); + assert(_ctx.at(pad_index).typeInfo().type() == ir::DataType::INT32); + assert(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank()); +} + +void OperationValidator::visit(const ir::operation::Min &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)}; + + UNUSED_RELEASE(output_index); + UNUSED_RELEASE(lhs_index); + UNUSED_RELEASE(rhs_index); + + assert(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type()); + assert(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type()); +} + +void OperationValidator::visit(const ir::operation::Max &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)}; + + UNUSED_RELEASE(output_index); + UNUSED_RELEASE(lhs_index); + UNUSED_RELEASE(rhs_index); + + assert(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type()); + assert(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type()); +} + +} // namespace compiler +} // namespace neurun diff --git a/runtime/neurun/core/src/compiler/OperationValidator.h b/runtime/neurun/core/src/compiler/OperationValidator.h new file mode 100644 index 000000000..6ff3c7815 --- /dev/null +++ b/runtime/neurun/core/src/compiler/OperationValidator.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_COMPILER_OPERATION_VALIDATOR_H__ +#define __NEURUN_COMPILER_OPERATION_VALIDATOR_H__ + +#include "ir/Layout.h" +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +class Graph; +class Operands; +} // namespace ir +} // namespace neurun + +namespace neurun +{ +namespace compiler +{ + +class OperationValidator : public ir::OperationVisitor +{ +public: + OperationValidator(void) = delete; + OperationValidator(const ir::Graph &graph); + +public: + void operator()(); + +public: + void visit(const ir::operation::BatchToSpaceND &node) override; + void visit(const ir::operation::Cast &node) override; + void visit(const ir::operation::Comparison &node) override; + void visit(const ir::operation::Softmax &node) override; + void visit(const ir::operation::InstanceNorm &node) override; + void visit(const ir::operation::Permute &node) override; + void visit(const ir::operation::ReduceSum &node) override; + void visit(const ir::operation::Transpose &node) override; + void visit(const ir::operation::ReduceMax &node) override; + void visit(const ir::operation::RNN &node) override; + void visit(const ir::operation::SpaceToBatchND &node) override; + void visit(const ir::operation::SpaceToDepth &node) override; + void visit(const ir::operation::EmbeddingLookup &node) override; + void visit(const ir::operation::Exp &node) override; + void visit(const ir::operation::Floor &node) override; + void visit(const ir::operation::HashtableLookup &node) override; + void visit(const ir::operation::TransposeConv &node) override; + void visit(const ir::operation::Gather &node) override; + void visit(const ir::operation::Dequantize &node) override; + void visit(const ir::operation::Mean &node) override; + void visit(const ir::operation::DepthToSpace &node) override; + void visit(const ir::operation::Pack &node) override; + void visit(const ir::operation::ReduceMin &node) override; + void visit(const ir::operation::LSTM &node) override; + void visit(const ir::operation::Unpack &node) override; + void visit(const ir::operation::Pad &node) override; + void visit(const ir::operation::Min &node) override; + void visit(const ir::operation::Max &node) override; + +private: + // TODO Remove _ctx field + const ir::Graph &_graph; + const ir::Operands &_ctx; + ir::Layout _current_subg_layout; +}; + +} // namespace compiler +} // namespace neurun + +#endif // __NEURUN_COMPILER_OPERATION_VALIDATOR_H__ diff --git a/runtime/neurun/core/src/compiler/ParamChecker.cc b/runtime/neurun/core/src/compiler/ParamChecker.cc new file mode 100644 index 000000000..bf172b53f --- /dev/null +++ b/runtime/neurun/core/src/compiler/ParamChecker.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ParamChecker.h" + +#include "ir/Graph.h" + +namespace neurun +{ +namespace compiler +{ + +void ParamChecker::operator()() +{ + _model->operations().iterate( + [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); }); +} + +} // namespace compiler +} // namespace neurun diff --git a/runtime/neurun/core/src/compiler/ParamChecker.h b/runtime/neurun/core/src/compiler/ParamChecker.h new file mode 100644 index 000000000..7e88ff185 --- /dev/null +++ b/runtime/neurun/core/src/compiler/ParamChecker.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file ParamChecker.h + * @brief This file contains ParamChecker to check\n + * operations' parameters are compilable at machine independent phase\n + * ex) Check param is constant + */ +#ifndef __NEURUN_COMPILER_PARAM_CHECKER_H__ +#define __NEURUN_COMPILER_PARAM_CHECKER_H__ + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +class Graph; +} // namespace ir +} // namespace neurun + +namespace neurun +{ +namespace compiler +{ + +class ParamChecker : public ir::OperationVisitor +{ +public: + /** + * @brief Construct a new Param Checker object (deleted) + */ + ParamChecker(void) = delete; + /** + * @brief Construct a new Param Checker object + * @param[in] model Graph model to check + */ + ParamChecker(std::shared_ptr<ir::Graph> model) : _model{model} {} + +public: + /** + * @brief Run parameter analysis + */ + void operator()(); + /** + * @brief Return analysis result if model have non-const parameter + * @return @c true if there is non-const parameter, otherwise @c false + */ + bool haveNoneConstParam(void) { return _nonConstParam; } + +private: + const std::shared_ptr<ir::Graph> _model; + bool _nonConstParam{false}; +}; + +} // namespace compiler +} // namespace neurun + +#endif // __NEURUN_COMPILER_OPERATION_VALIDATOR_H__ diff --git a/runtime/neurun/core/src/compiler/SubTensorAnalyzer.cc b/runtime/neurun/core/src/compiler/SubTensorAnalyzer.cc new file mode 100644 index 000000000..dae1a74ff --- /dev/null +++ b/runtime/neurun/core/src/compiler/SubTensorAnalyzer.cc @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SubTensorAnalyzer.h" + +#include <typeinfo> + +#include "cpp14/memory.h" +#include "ir/OperandIndexSequence.h" +#include "util/logging.h" +#include "util/Coordinates.h" + +namespace neurun +{ +namespace compiler +{ + +void SubTensorAnalyzer::visit(const ir::operation::Concat &node) +{ + // If operator is concat (or other operators related with subsumption), fill subsumption info + // TODO: if one tensor is subset of many parents or model input + // Solution 1. Handle 1st parent only, ignore others (need to invert for other children) + // Solution 2. Insert copy operation for other parents + int32_t axis_raw = node.param().axis; + + const auto &output_index = node.getOutputs().at(0); + const auto &inputs = node.getInputs(); + + int32_t axis_point = 0; + const auto rank = _graph.operands().at(output_index).shape().rank(); + int32_t axis = axis_raw < 0 ? (axis_raw + rank) : axis_raw; + assert(rank > axis); + + for (const auto &input_index : inputs) + { + // NOTE Not support multiple parent tensor yet + // + // Let us consider the following example (where OP_i is not a CONCAT): + // + // %0 = OP_0 + // %1 = OP_1 + // %2 = OP_2 + // %3 = CONCAT(%0, %1) + // %4 = CONCAT(%0, %2) + // + // %0 and %1 SHOULD be consecutive in order to eliminate the former CONCAT operation, + // which makes it impossible to eliminate the latter CONCAT operation. + // - Note that %0 and %2 cannot be consecutive. + if (_graph.operands().at(input_index).parent_info() != nullptr) + { + return; + } + + // NOTE Not support the case that concat's input is a constant or a input of model + if (_graph.operands().at(input_index).isConstant() || _graph.getInputs().contains(input_index)) + { + return; + } + } + + for (const auto &input_index : inputs) + { + auto input_shape = _graph.operands().at(input_index).shape(); + assert(rank == input_shape.rank()); + + neurun::util::Coordinates coordinate_info{}; + for (int i = 0; i < rank; i++) + { + coordinate_info.set(i, 0); + } + coordinate_info.set(axis, axis_point); + + auto parentInfo = + nnfw::cpp14::make_unique<ir::operand::ParentInfo>(output_index, coordinate_info); + + _graph.operands().at(input_index).parent_info(std::move(parentInfo)); + + axis_point += input_shape.dim(axis); + } +} + +} // namespace compiler +} // namespace neurun diff --git a/runtime/neurun/core/src/compiler/SubTensorAnalyzer.h b/runtime/neurun/core/src/compiler/SubTensorAnalyzer.h new file mode 100644 index 000000000..54d41e460 --- /dev/null +++ b/runtime/neurun/core/src/compiler/SubTensorAnalyzer.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file SubTensorAnalyzer.h + * @brief This file contains SubTensorAnalyzer to analyze tensor subsumption + * using operation visitor + */ + +#ifndef __NEURUN_COMPILER_SUBTENSOR_ANALYZER_H__ +#define __NEURUN_COMPILER_SUBTENSOR_ANALYZER_H__ + +#include "ir/OperationVisitor.h" +#include "ir/Graph.h" + +namespace neurun +{ +namespace compiler +{ + +/** + * @brief Class to analyze tensor subsumption + */ +class SubTensorAnalyzer : public ir::OperationVisitor +{ +public: + /** + * @brief Construct a new SubTensorAnalyzer object + * @param[in] ctx Graph operand set + */ + SubTensorAnalyzer(ir::Graph &graph) : _graph{graph} + { + // DO NOTHING + } + +public: + void visit(const ir::operation::Concat &) override; + +private: + ir::Graph &_graph; // TODO Refactor : Do not update Operands +}; + +} // namespace compiler +} // namespace neurun + +#endif // __NEURUN_COMPILER_SUBTENSOR_ANALYZER_H__ diff --git a/runtime/neurun/core/src/dumper/dot/DotBuilder.cc b/runtime/neurun/core/src/dumper/dot/DotBuilder.cc new file mode 100644 index 000000000..4c7089a9c --- /dev/null +++ b/runtime/neurun/core/src/dumper/dot/DotBuilder.cc @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DotBuilder.h" + +namespace neurun +{ +namespace dumper +{ +namespace dot +{ + +// DotDumper +DotBuilder::DotBuilder() {} + +void DotBuilder::update(const Node &node_info) +{ + add(node_info); + for (auto edge : node_info.edges()) + { + addEdge(node_info, *edge); + } +} + +void DotBuilder::addSubgraph(const DotSubgraphInfo &subgraph_info) +{ + _dot << "op_seq cluster_" << subgraph_info.index().value() << " {\n"; + _dot << " label=\"" << subgraph_info.label() << "\";\n"; + _dot << " style=filled;\n"; + _dot << " color=lightgrey;\n"; + _dot << " "; + for (auto op : subgraph_info.operations()) + { + _dot << "operation" << op.value() << "; "; + } + for (auto op : subgraph_info.operands()) + { + _dot << "operand" << op.value() << "; "; + } + _dot << "\n"; + _dot << "}\n"; +} + +void DotBuilder::writeDot(std::ostream &os) +{ + os << "digraph D {\n" + << _dot.str() << "\n" + << "}\n"; +} + +void DotBuilder::add(const Node &node) +{ + _dot << node.id(); + std::stringstream ss; + _dot << "["; + for (auto attr : node.attributes()) + { + _dot << attr.first << "=\"" << attr.second << "\" "; + } + _dot << "];\n"; +} + +void DotBuilder::addEdge(const Node &node1, const Node &node2) +{ + _dot << node1.id() << " -> " << node2.id() << ";\n"; +} + +} // namespace dot +} // namespace dumper +} // namespace neurun diff --git a/runtime/neurun/core/src/dumper/dot/DotBuilder.h b/runtime/neurun/core/src/dumper/dot/DotBuilder.h new file mode 100644 index 000000000..c04f6bc52 --- /dev/null +++ b/runtime/neurun/core/src/dumper/dot/DotBuilder.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_DUMPER_DOT_DOT_BUILDER_H__ +#define __NEURUN_DUMPER_DOT_DOT_BUILDER_H__ + +#include <sstream> + +#include "ir/Index.h" +#include "ir/Operation.h" +#include "ir/Operand.h" + +#include "OperationNode.h" +#include "OperandNode.h" +#include "DotSubgraphInfo.h" + +using Operation = neurun::ir::Operation; +using Object = neurun::ir::Operand; + +namespace neurun +{ +namespace dumper +{ +namespace dot +{ + +class DotBuilder +{ +public: + DotBuilder(); + +public: + void update(const Node &dotinfo); + void addSubgraph(const DotSubgraphInfo &subgraph_info); + + void writeDot(std::ostream &os); + +private: + void add(const Node &dotinfo); + void addEdge(const Node &dotinfo1, const Node &dotinfo2); + + std::stringstream _dot; +}; + +} // namespace dot +} // namespace dumper +} // namespace neurun + +#endif // __NEURUN_DUMPER_DOT_DOT_BUILDER_H__ diff --git a/runtime/neurun/core/src/dumper/dot/DotDumper.cc b/runtime/neurun/core/src/dumper/dot/DotDumper.cc new file mode 100644 index 000000000..44313a657 --- /dev/null +++ b/runtime/neurun/core/src/dumper/dot/DotDumper.cc @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <fstream> +#include <unordered_map> + +#include "DotDumper.h" +#include "DotBuilder.h" +#include "DotSubgraphInfo.h" +#include "ir/OpSequence.h" +#include "ir/OperationIndexMap.h" +#include "backend/Backend.h" +#include "backend/BackendManager.h" +#include "backend/IConfig.h" + +namespace neurun +{ +namespace dumper +{ +namespace dot +{ + +void DotDumper::dump(const std::string &tag) +{ + if (_level == Level::OFF) + { + return; + } + + neurun::dumper::dot::DotBuilder dot_builder; + + auto &operations = _graph.operations(); + auto &operands = _graph.operands(); + + ir::OperationIndexMap<std::unique_ptr<Operation>> operation_nodes; + std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> operand_nodes; + + operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &op) { + auto node = nnfw::cpp14::make_unique<Operation>(index, op); + + for (auto output : op.getOutputs()) + { + using neurun::dumper::dot::Operand; + auto child = std::make_shared<Operand>(output, Operand::Type::MODEL_OUTPUT); + node->addEdge(child); + } + + operation_nodes.emplace(index, std::move(node)); + }); + + auto backend_to_fillcolor = [](const backend::Backend *backend) { + static const auto map = []() { + std::unordered_map<const backend::Backend *, std::string> ret; + uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :( + for (const auto backend : backend::BackendManager::get().getAll()) + { + ret.emplace(backend, Node::BG_COLORS[index]); + index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0])); + } + return ret; + }(); + + auto itr = map.find(backend); + if (itr == map.end()) + { + return Node::DEFAULT_FILLCOLOR; + } + else + { + return itr->second; + } + }; + + util::Set<ir::OperandIndex> shown_operand_set; + + operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &object) { + bool showing_cond = false; + if (_level == Level::ALL) + { + showing_cond = true; + } + else + { + showing_cond = !object.isConstant(); + } + if (object.isConstant() || _graph.getInputs().contains(index)) + { + showing_cond = showing_cond && (object.getUses().size() > 0); + } + if (showing_cond) + { + shown_operand_set.add(index); + + auto type = [&]() { + using neurun::dumper::dot::Operand; + if (_graph.getInputs().contains(index)) + return Operand::Type::MODEL_INPUT; + if (_graph.getOutputs().contains(index)) + return Operand::Type::MODEL_OUTPUT; + return Operand::Type::INTERNAL; + }(); + + auto lower_info = _graph.getLowerInfo(index); + auto node = nnfw::cpp14::make_unique<Operand>(index, type); + + { + // Display LowerInfo attributes + std::string label = std::to_string(index.value()); + std::string fillcolor = ""; + if (lower_info) + { + const auto &def_factors = lower_info->def_factors(); + if (def_factors.size() > 0) + { + label += "\\n["; + label += def_factors.getOnlyElement().backend()->config()->id(); + label += "]"; + + fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend()); + } + } + node->setAttribute("label", label); + node->setAttribute("fillcolor", fillcolor); + } + + for (auto operation_index : object.getUses().list()) + { + auto &operation = operations.at(operation_index); + auto child = std::make_shared<Operation>(operation_index, operation); + node->addEdge(child); + } + + operand_nodes.emplace(index, std::move(node)); + } + }); + + const auto subgraphs = _graph.subgraphs(); + if (subgraphs) + { + subgraphs->iterate([&](const ir::SubgraphIndex &index, const ir::OpSequence &op_seq) { + const auto lower_info = _graph.getLowerInfo(index); + auto fillcolor = backend_to_fillcolor(lower_info->backend()); + std::string label = + std::to_string(index.value()) + " [" + lower_info->backend()->config()->id() + "]"; + DotSubgraphInfo subgraph_info{index, op_seq, shown_operand_set}; + subgraph_info.label(label); + subgraph_info.fillcolor(fillcolor); + dot_builder.addSubgraph(subgraph_info); + + // Set fillcolor of all operations in the op_seq + for (const auto &op : op_seq.operations()) + { + auto found = operation_nodes.find(op.index); + if (found != operation_nodes.end()) + { + auto &&op = found->second; + op->setAttribute("fillcolor", fillcolor); + } + } + }); + } + + for (const auto &e : operation_nodes) + dot_builder.update(*e.second); + for (const auto &e : operand_nodes) + dot_builder.update(*e.second); + + // Dump to file + { + std::string file_name; + file_name += tag; + file_name += ".dot"; + std::filebuf fb; + + fb.open(file_name, std::ios::out); + std::ostream os(&fb); + + dot_builder.writeDot(os); + + fb.close(); + } +} + +} // namespace dot +} // namespace dumper +} // namespace neurun diff --git a/runtime/neurun/core/src/dumper/dot/DotDumper.h b/runtime/neurun/core/src/dumper/dot/DotDumper.h new file mode 100644 index 000000000..ec4d2b967 --- /dev/null +++ b/runtime/neurun/core/src/dumper/dot/DotDumper.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Graph.h" + +#ifndef __NEURUN_DUMPER_DOT_DOT_DUMPER_H__ +#define __NEURUN_DUMPER_DOT_DOT_DUMPER_H__ + +namespace neurun +{ +namespace dumper +{ +namespace dot +{ + +class DotDumper +{ +public: + enum Level + { + OFF = 0, //< Do not dump + ALL_BUT_CONSTANTS = 1, //< Emit all operations and operands but constants + ALL = 2 //< Emit all operations and operands + }; + +public: + DotDumper(const ir::Graph &graph, Level level) : _graph(graph), _level{level} {} + +public: + /** + * @brief Dump to dot file as tag name if "GRAPH_DOT_DUMP" is set + * + * @param[in] tag The name of dot file that would be created + * @return N/A + */ + void dump(const std::string &tag); + +private: + const ir::Graph &_graph; + Level _level; +}; + +} // namespace dot +} // namespace dumper +} // namespace neurun + +#endif // __NEURUN_DUMPER_DOT_DOT_DUMPER_H__ diff --git a/runtime/neurun/core/src/dumper/dot/DotSubgraphInfo.cc b/runtime/neurun/core/src/dumper/dot/DotSubgraphInfo.cc new file mode 100644 index 000000000..8cfe35900 --- /dev/null +++ b/runtime/neurun/core/src/dumper/dot/DotSubgraphInfo.cc @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DotSubgraphInfo.h" + +#include <sstream> + +namespace neurun +{ +namespace dumper +{ +namespace dot +{ + +DotSubgraphInfo::DotSubgraphInfo(const ir::SubgraphIndex &index, const ir::OpSequence &op_seq, + const util::Set<ir::OperandIndex> &shown_operands) + : _index{index} +{ + for (const auto &element : op_seq.operations()) + { + _operations.insert(element.index); + for (auto o : element.node->getInputs()) + { + // Must be a shown operand, not op_seq's inputs + if (shown_operands.contains(o) && !op_seq.getInputs().contains(o)) + { + _operands.insert(o); + } + } + for (auto o : element.node->getOutputs()) + { + // Must be a shown operand, not op_seq's inputs + if (shown_operands.contains(o) && !op_seq.getOutputs().contains(o)) + { + _operands.insert(o); + } + } + } +} + +} // namespace dot +} // namespace dumper +} // namespace neurun diff --git a/runtime/neurun/core/src/dumper/dot/DotSubgraphInfo.h b/runtime/neurun/core/src/dumper/dot/DotSubgraphInfo.h new file mode 100644 index 000000000..0aa7c6ddf --- /dev/null +++ b/runtime/neurun/core/src/dumper/dot/DotSubgraphInfo.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__ +#define __NEURUN_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__ + +#include <unordered_set> + +#include "ir/Index.h" +#include "ir/OpSequence.h" +#include "util/Set.h" + +namespace neurun +{ +namespace dumper +{ +namespace dot +{ + +class DotSubgraphInfo +{ +public: + DotSubgraphInfo(const ir::SubgraphIndex &index, const ir::OpSequence &op_seq, + const util::Set<ir::OperandIndex> &shown_operands); + + ir::SubgraphIndex index() const { return _index; } + std::string label() const { return _label; } + void label(const std::string &val) { _label = val; } + std::string fillcolor() const { return _fillcolor; } + void fillcolor(const std::string &val) { _fillcolor = val; } + const std::unordered_set<ir::OperationIndex> &operations() const { return _operations; } + const std::unordered_set<ir::OperandIndex> &operands() const { return _operands; } + +private: + ir::SubgraphIndex _index; + std::string _label; + std::string _fillcolor; + std::unordered_set<ir::OperationIndex> _operations; + std::unordered_set<ir::OperandIndex> _operands; +}; + +} // namespace dot +} // namespace dumper +} // namespace neurun + +#endif // __NEURUN_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__ diff --git a/runtime/neurun/core/src/dumper/dot/Node.cc b/runtime/neurun/core/src/dumper/dot/Node.cc new file mode 100644 index 000000000..166f0f40f --- /dev/null +++ b/runtime/neurun/core/src/dumper/dot/Node.cc @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Node.h" + +namespace neurun +{ +namespace dumper +{ +namespace dot +{ + +const std::string Node::DEFAULT_COLORSCHEME = "x11"; +const std::string Node::DEFAULT_FILLCOLOR = "white"; +// RED, BLUE, GREEN, PURPLE, ORANGE, YELLOW, BROWN, PINK +const std::string Node::BG_COLORS[8] = {"1", "2", "3", "4", "5", "6", "7", "8"}; + +Node::Node(const std::string &id) : _id{id} +{ + // Set default values + _attributes["style"] = "filled"; + _attributes["colorscheme"] = DEFAULT_COLORSCHEME; + _attributes["fillcolor"] = DEFAULT_FILLCOLOR; +} + +void Node::setAttribute(const std::string &key, const std::string &val) { _attributes[key] = val; } + +std::string Node::getAttribute(const std::string &key) +{ + auto itr = _attributes.find(key); + if (itr == _attributes.end()) + { + return ""; + } + else + { + return itr->second; + } +} + +} // namespace dot +} // namespace dumper +} // namespace neurun diff --git a/runtime/neurun/core/src/dumper/dot/Node.h b/runtime/neurun/core/src/dumper/dot/Node.h new file mode 100644 index 000000000..364cb08a4 --- /dev/null +++ b/runtime/neurun/core/src/dumper/dot/Node.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Node.h + * @brief This file contains Node class + * @ingroup COM_AI_RUNTIME + * + */ + +#ifndef __NEURUN_DUMPER_DOT_NODE_H__ +#define __NEURUN_DUMPER_DOT_NODE_H__ + +#include <string> +#include <memory> +#include <vector> +#include <unordered_map> + +namespace neurun +{ +namespace dumper +{ +namespace dot +{ + +enum BGCOLORS : int +{ + RED, + BLUE, + GREEN, + PUPLE, + ORANGE, + YELLOW, + BROWN, + PINK +}; + +/** + * @brief Class that represents a Node in "dot" format + * +*/ +class Node +{ +public: + const static std::string DEFAULT_FILLCOLOR; + const static std::string DEFAULT_COLORSCHEME; + const static std::string BG_COLORS[8]; + +public: + /** + * @brief Destroy the Node object + * + */ + virtual ~Node() = default; + + /** + * @brief Construct a new Node object + * + * @param id + */ + Node(const std::string &id); + + /** + * @brief return id + * + * @return id + */ + std::string id() const { return _id; } + + /** + * @brief return attributes + * + * @return const reference of attributes object + */ + const std::unordered_map<std::string, std::string> &attributes() const { return _attributes; } + /** + * @brief Store an attribute with key-value pair + * + * @param[in] key attribute's key + * @param[in] val attribute's value that is associated with the key + */ + void setAttribute(const std::string &key, const std::string &val); + /** + * @brief Get the attributte value that is associated with key + * + * @param[in] key key of the attribute + * @return value that is associated with the key + */ + std::string getAttribute(const std::string &key); + + /** + * @brief Add an edge in the graph, which is an outgoing edge + * + * @param[in] dotinfo A node that the new edge will be connected to + */ + void addEdge(std::shared_ptr<Node> dotinfo) { _children.emplace_back(dotinfo); } + /** + * @brief Return list of edges + * + * @return Edges + */ + const std::vector<std::shared_ptr<Node>> &edges() const { return _children; } + +private: + std::string _id; + std::unordered_map<std::string, std::string> _attributes; + std::vector<std::shared_ptr<Node>> _children; +}; + +} // namespace dot +} // namespace dumper +} // namespace neurun + +#endif // __NEURUN_DUMPER_DOT_NODE_H__ diff --git a/runtime/neurun/core/src/dumper/dot/OperandNode.cc b/runtime/neurun/core/src/dumper/dot/OperandNode.cc new file mode 100644 index 000000000..76d2c704c --- /dev/null +++ b/runtime/neurun/core/src/dumper/dot/OperandNode.cc @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <sstream> + +#include "OperandNode.h" +#include "ir/Graph.h" +#include "ir/operand/LowerInfo.h" + +namespace neurun +{ +namespace dumper +{ +namespace dot +{ + +const std::string Operand::INPUT_SHAPE = "doublecircle"; +const std::string Operand::OUTPUT_SHAPE = "doublecircle"; +const std::string Operand::OPERAND_SHAPE = "ellipse"; +const std::string Operand::BG_COLOR_SCHEME = "set18"; + +Operand::Operand(const ir::OperandIndex &index, Type type) + : Node{"operand" + std::to_string(index.value())} +{ + { + auto type_to_shape = [](Type type) { + switch (type) + { + case Type::MODEL_INPUT: + return INPUT_SHAPE; + case Type::MODEL_OUTPUT: + return OUTPUT_SHAPE; + case Type::UNDEFINED: + case Type::INTERNAL: + default: + return OPERAND_SHAPE; + } + }; + setAttribute("shape", type_to_shape(type)); + } + + setAttribute("colorscheme", BG_COLOR_SCHEME); +} + +} // namespace dot +} // namespace dumper +} // namespace neurun diff --git a/runtime/neurun/core/src/dumper/dot/OperandNode.h b/runtime/neurun/core/src/dumper/dot/OperandNode.h new file mode 100644 index 000000000..5ebd651b6 --- /dev/null +++ b/runtime/neurun/core/src/dumper/dot/OperandNode.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Operand.h + * @brief This file contains Operand + * @ingroup COM_AI_RUNTIME + * + */ + +#ifndef __NEURUN_DUMPER_DOT_DOT_OPERAND_INFO_H__ +#define __NEURUN_DUMPER_DOT_DOT_OPERAND_INFO_H__ + +#include <vector> + +#include "Node.h" +#include "ir/Operand.h" +#include "ir/Index.h" + +namespace neurun +{ +namespace dumper +{ +namespace dot +{ + +/** + * @brief Class that represents an Operand + * + */ +class Operand : public Node +{ +public: + enum class Type + { + UNDEFINED, + MODEL_INPUT, + MODEL_OUTPUT, + INTERNAL + }; + +public: + static const std::string INPUT_SHAPE; + static const std::string OUTPUT_SHAPE; + static const std::string OPERAND_SHAPE; + static const std::string BG_COLOR_SCHEME; + +public: + /** + * @brief Construct a new Operand Node object + * + * @param[in] index Operand index + * @param[in] type Operand type + * @param[in] lower_info Operand LowerInfo + */ + Operand(const ir::OperandIndex &index, Type type); + +private: + void addBackendLabel(); +}; + +} // namespace dot +} // namespace dumper +} // namespace neurun + +#endif // __NEURUN_DUMPER_DOT_DOT_OPERAND_INFO_H__ diff --git a/runtime/neurun/core/src/dumper/dot/OperationNode.cc b/runtime/neurun/core/src/dumper/dot/OperationNode.cc new file mode 100644 index 000000000..ca870ba05 --- /dev/null +++ b/runtime/neurun/core/src/dumper/dot/OperationNode.cc @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <sstream> + +#include "OperationNode.h" +#include "ir/Graph.h" +#include "ir/operation/LowerInfo.h" +#include "backend/IConfig.h" +#include "backend/Backend.h" + +namespace neurun +{ +namespace dumper +{ +namespace dot +{ + +const std::string Operation::OPERATION_SHAPE = "rect"; +const std::string Operation::BG_COLOR_SCHEME = "pastel18"; + +Operation::Operation(const ir::OperationIndex &index, const ir::Operation &node) + : Node{"operation" + std::to_string(index.value())} +{ + setAttribute("label", std::to_string(index.value()) + " : " + node.name()); + setAttribute("shape", OPERATION_SHAPE); + setAttribute("colorscheme", BG_COLOR_SCHEME); + setAttribute("fillcolor", DEFAULT_FILLCOLOR); +} + +} // namespace dot +} // namespace dumper +} // namespace neurun diff --git a/runtime/neurun/core/src/dumper/dot/OperationNode.h b/runtime/neurun/core/src/dumper/dot/OperationNode.h new file mode 100644 index 000000000..ba0713790 --- /dev/null +++ b/runtime/neurun/core/src/dumper/dot/OperationNode.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Operation.h + * @brief This file contains Operation + * @ingroup COM_AI_RUNTIME + * + */ + +#ifndef __NEURUN_DUMPER_DOT_DOT_NODE_INFO_H__ +#define __NEURUN_DUMPER_DOT_DOT_NODE_INFO_H__ + +#include "Node.h" +#include "ir/Operation.h" +#include "ir/Index.h" + +namespace neurun +{ +namespace dumper +{ +namespace dot +{ + +/** + * @brief Class that represents an Operation + * + */ +class Operation : public Node +{ +public: + static const std::string OPERATION_SHAPE; + static const std::string BG_COLOR_SCHEME; + +public: + /** + * @brief Construct a new Operation Node object + * + * @param[in] index operation index + * @param[in] node operation object + */ + Operation(const ir::OperationIndex &index, const ir::Operation &node); +}; + +} // namespace dot +} // namespace dumper +} // namespace neurun + +#endif // __NEURUN_DUMPER_DOT_DOT_NODE_INFO_H__ diff --git a/runtime/neurun/core/src/exec/DataflowExecutor.cc b/runtime/neurun/core/src/exec/DataflowExecutor.cc new file mode 100644 index 000000000..e22d41031 --- /dev/null +++ b/runtime/neurun/core/src/exec/DataflowExecutor.cc @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DataflowExecutor.h" + +#include <cassert> + +#include "util/logging.h" + +namespace neurun +{ +namespace exec +{ + +int64_t DataflowExecutor::calculateRank(const std::vector<ir::Element> &operations) +{ + int64_t rank = 0; + if (!_indexed_ranks) + { + return rank; + } + for (const auto &element : operations) + { + auto it = _indexed_ranks->find(element.index); + if (it == _indexed_ranks->end()) + { + assert(element.node->opcode() == ir::OpCode::Permute); + // assign int32_t::max to prevent integer overflow + rank += std::numeric_limits<int32_t>::max(); + } + else + { + rank += it->second; + } + } + return rank; +} + +void DataflowExecutor::emplaceToReadyJobs(const uint32_t &id) +{ + auto &job = _waiting_jobs[id]; + assert(job != nullptr); + auto &subg = _graph.subgraphs()->at(_job_to_op_seq[job->index()]); + auto rank = calculateRank(subg.operations()); + _ready_jobs.emplace(rank, std::move(job)); +} + +void DataflowExecutor::notify(uint32_t finished_job_id) +{ + for (auto id : _output_info[finished_job_id]) + { + assert(_input_info[id] > 0); + auto count = --_input_info[id]; + if (count == 0) // No dependent jobs left, ready for execution + { + emplaceToReadyJobs(id); + } + } +} +bool DataflowExecutor::noWaitingJobs() +{ + return std::all_of(_waiting_jobs.begin(), _waiting_jobs.end(), + [](const std::unique_ptr<Job> &job) { return job == nullptr; }); +} + +DataflowExecutor::DataflowExecutor(const ir::Graph &graph, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, + CodeMap &&code_map) + : ExecutorBase{graph, operand_context, std::move(tensor_mgrs)}, _code_map{std::move(code_map)} +{ + VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl; + + const ir::Subgraphs *subgraphs = _graph.subgraphs(); + // Assign jobs convert SubgraphIndex to job index(uint32_t) + uint32_t next_job_index = 0; + std::unordered_map<ir::SubgraphIndex, uint32_t> subgraph_to_job; + subgraphs->iterate([&](const ir::SubgraphIndex &subg_index, const ir::OpSequence &) { + VERBOSE(DataflowExecutor) << "Create a job #" << next_job_index << " with SubgraphIndex " + << subg_index.value() << std::endl; + _finished_jobs.emplace_back( + nnfw::cpp14::make_unique<Job>(next_job_index, _code_map.at(subg_index).get())); + subgraph_to_job[subg_index] = next_job_index++; + }); + + _waiting_jobs.resize(next_job_index); + _output_info.resize(next_job_index); + _initial_input_info.resize(next_job_index, 0); + + subgraphs->iterate([&](const ir::SubgraphIndex &subg_index, const ir::OpSequence &subg) { + auto job_index = subgraph_to_job[subg_index]; + for (auto output : subg.getOutputs()) + { + // Update output and input info + subgraphs->iterate( + [&](const ir::SubgraphIndex &subg_cur_index, const ir::OpSequence &subg_cur) { + if (subg_cur.getInputs().contains(output)) + { + auto dep_index = subgraph_to_job[subg_cur_index]; + ++_initial_input_info[dep_index]; + _output_info[job_index].push_back(dep_index); + } + }); + } + }); + for (const auto &s : subgraph_to_job) + _job_to_op_seq.emplace(s.second, s.first); + + _input_info = _initial_input_info; +} + +void DataflowExecutor::executeImpl() +{ + assert(noWaitingJobs()); + + // Execution setup + _waiting_jobs.swap(_finished_jobs); // Move finished jobs to waiting jobs + + for (uint32_t i = 0; i < _waiting_jobs.size(); ++i) + { + if (_input_info[i] == 0) + { + emplaceToReadyJobs(i); + } + } + assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs + bool is_profiling = util::getConfigBool(util::config::PROFILING_MODE); + + _subject.notifyModelBegin(this); + + while (!_ready_jobs.empty()) + { + auto job = std::move((_ready_jobs.begin())->second); + _ready_jobs.erase(_ready_jobs.begin()); + auto job_index = job->index(); + VERBOSE(DataflowExecutor) << "Run job #" << job_index << std::endl; + + auto subgraph_index = _job_to_op_seq[job_index]; + auto op_seq = &_graph.subgraphs()->at(subgraph_index); + const backend::Backend *backend = + _graph.getLowerInfo()->operation.at(subgraph_index)->backend(); + + _subject.notifyJobBegin(this, op_seq, backend); + + if (is_profiling) + job->fn()->runSync(); + else + job->run(); + + _subject.notifyJobEnd(this, op_seq, backend); + notify(job_index); + _finished_jobs[job_index] = std::move(job); + } + assert(noWaitingJobs()); + + _subject.notifyModelEnd(this); + + // Reset input info for the next execution + _input_info = _initial_input_info; +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/DataflowExecutor.h b/runtime/neurun/core/src/exec/DataflowExecutor.h new file mode 100644 index 000000000..6c12093fd --- /dev/null +++ b/runtime/neurun/core/src/exec/DataflowExecutor.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_DATAFLOW_EXECUTOR_H__ +#define __NEURUN_EXEC_DATAFLOW_EXECUTOR_H__ + +#include <list> +#include <map> +#include <unordered_map> + +#include "FunctionSequence.h" +#include "Job.h" +#include "ir/OperandIndexSequence.h" +#include "ir/Index.h" +#include "cpp14/memory.h" +#include "exec/ExecutorBase.h" + +namespace neurun +{ +namespace exec +{ + +class DataflowExecutor : public ExecutorBase +{ +public: + using CodeMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<FunctionSequence>>; + +protected: + virtual void notify(uint32_t finished_job_id); + bool noWaitingJobs(); + +public: + /** + * @brief Constructs a DataflowExecutor object + * + * @param graph Graph object + * @param operand_context (Only for input/output operand data access) + * @param code_map Compiled code map + * @param ranks Operation ranks for ordering execution + */ + DataflowExecutor(const ir::Graph &graph, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, CodeMap &&code_map); + + void executeImpl() override; + +protected: + int64_t calculateRank(const std::vector<ir::Element> &operations); + void emplaceToReadyJobs(const uint32_t &id); + +protected: + CodeMap _code_map; + /** + * @brief A vector of finished jobs for current execution + * After a run it has all the jobs of this execution for the next run + */ + std::vector<std::unique_ptr<Job>> _finished_jobs; + /** + * @brief A vector of waiting jobs for current execution + * All the jobs are moved from #_finished_jobs to it when start a run + */ + std::vector<std::unique_ptr<Job>> _waiting_jobs; + /** + * @brief Jobs' output info + * Used for notifying after finishing a job + */ + std::vector<std::list<uint32_t>> _output_info; + std::vector<uint32_t> _initial_input_info; + std::vector<uint32_t> _input_info; + /** + * @brief A collection of jobs that are ready for execution + * Jobs in it are ready to be scheduled. + * Ordered by priority from `_indexed_ranks` + */ + std::multimap<int64_t, std::unique_ptr<Job>, std::greater<int64_t>> _ready_jobs; + + /// @brief Which job runs which op and function. + std::unordered_map<uint32_t, ir::SubgraphIndex> _job_to_op_seq; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_DATAFLOW_EXECUTOR_H__ diff --git a/runtime/neurun/core/src/exec/Execution.cc b/runtime/neurun/core/src/exec/Execution.cc new file mode 100644 index 000000000..bc7bbd160 --- /dev/null +++ b/runtime/neurun/core/src/exec/Execution.cc @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/Execution.h" + +#include "util/logging.h" + +namespace neurun +{ +namespace exec +{ + +Execution::Execution(const std::shared_ptr<IExecutor> &executor) : _executor{executor} +{ + _io_desc.inputs.resize(_executor->graph().getInputs().size()); + _io_desc.outputs.resize(_executor->graph().getOutputs().size()); +} + +// TODO Remove default parameter +void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t length, + ir::Layout layout) +{ + const auto input_index = graph().getInputs().at(index); + const auto info = graph().operands().at(input_index).info(); + + if (length < info.total_size()) + { + throw std::runtime_error{"Too small length"}; + } + + _io_desc.inputs.at(index.value()) = + nnfw::cpp14::make_unique<InputDesc>(info, buffer, length, layout); +} + +// TODO Remove default parameter +void Execution::setInput(const ir::IOIndex &index, const ir::TypeInfo &type, const ir::Shape &shape, + const void *buffer, size_t length, ir::Layout layout) +{ + const ir::OperandInfo info{shape, type}; + + if (length < info.total_size()) + { + throw std::runtime_error{"Too small length"}; + } + + _io_desc.inputs.at(index.value()) = + nnfw::cpp14::make_unique<InputDesc>(info, buffer, length, layout); +} + +// TODO Remove default parameter +void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length, ir::Layout layout) +{ + const auto output_index = graph().getOutputs().at(index); + const auto info = graph().operands().at(output_index).info(); + + if (length < info.total_size()) + { + throw std::runtime_error{"Too small length"}; + } + + _io_desc.outputs.at(index.value()) = + nnfw::cpp14::make_unique<OutputDesc>(info, buffer, length, layout); +} + +// TODO Remove default parameter +void Execution::setOutput(const ir::IOIndex &index, const ir::TypeInfo &type, + const ir::Shape &shape, void *buffer, size_t length, ir::Layout layout) +{ + const ir::OperandInfo info{shape, type}; + + if (length < info.total_size()) + { + throw std::runtime_error{"Too small length"}; + } + + _io_desc.outputs.at(index.value()) = + nnfw::cpp14::make_unique<OutputDesc>(info, buffer, length, layout); +} + +void Execution::setInputLayout(const ir::IOIndex &index, ir::Layout layout) +{ + const auto &input_desc = _io_desc.inputs.at(index.value()); + _io_desc.inputs.at(index.value()) = nnfw::cpp14::make_unique<InputDesc>( + input_desc->info, input_desc->buffer, input_desc->size, layout); +} + +void Execution::setOutputLayout(const ir::IOIndex &index, ir::Layout layout) +{ + const auto &output_desc = _io_desc.outputs.at(index.value()); + _io_desc.outputs.at(index.value()) = nnfw::cpp14::make_unique<OutputDesc>( + output_desc->info, output_desc->buffer, output_desc->size, layout); +} + +void Execution::execute() +{ + VERBOSE(Execution) << "Start execution" << std::endl; + + _executor->execute(_io_desc); + finished = true; + + VERBOSE(Execution) << "Execution finished" << std::endl; +} + +void Execution::startExecute() +{ + VERBOSE(Execution) << "Create asynchronous execution thread" << std::endl; + + _exec_thread = nnfw::cpp14::make_unique<std::thread>(&Execution::execute, this); +} + +void Execution::waitFinish() +{ + VERBOSE(Execution) << "Wait to finish execution" << std::endl; + + _exec_thread->join(); + finished = true; +} + +bool Execution::isFinished(void) const { return finished; } + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/ExecutionObservee.cc b/runtime/neurun/core/src/exec/ExecutionObservee.cc new file mode 100644 index 000000000..3b342d703 --- /dev/null +++ b/runtime/neurun/core/src/exec/ExecutionObservee.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ExecutionObservee.h" + +namespace neurun +{ +namespace exec +{ + +void ExecutionObservee::add(std::unique_ptr<IExecutionObserver> observer) +{ + _observers.emplace_back(std::move(observer)); +} + +void ExecutionObservee::notifyModelBegin(IExecutor *executor) +{ + for (auto &o : _observers) + { + o->handleBegin(executor); + } +} + +void ExecutionObservee::notifyModelEnd(IExecutor *executor) +{ + for (auto &o : _observers) + { + o->handleEnd(executor); + } +} + +void ExecutionObservee::notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq, + const backend::Backend *backend) +{ + for (auto &o : _observers) + { + o->handleBegin(executor, op_seq, backend); + } +} + +void ExecutionObservee::notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq, + const backend::Backend *backend) +{ + for (auto &o : _observers) + { + o->handleEnd(executor, op_seq, backend); + } +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/ExecutionObservee.h b/runtime/neurun/core/src/exec/ExecutionObservee.h new file mode 100644 index 000000000..dafeef55b --- /dev/null +++ b/runtime/neurun/core/src/exec/ExecutionObservee.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_EXECUTION_OBSERVEE_H__ +#define __NEURUN_EXEC_EXECUTION_OBSERVEE_H__ + +#include <list> + +#include "exec/ExecutionObservers.h" + +namespace neurun +{ +namespace exec +{ + +/** + * @brief Class that + * + */ +class ExecutionObservee +{ +public: + /** + * @brief Register an observer + * + * @param observer Observer to be added + */ + void add(std::unique_ptr<IExecutionObserver> observer); + void notifyModelBegin(IExecutor *executor); + void notifyModelEnd(IExecutor *executor); + void notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq, + const backend::Backend *backend); + void notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq, + const backend::Backend *backend); + +private: + std::list<std::unique_ptr<IExecutionObserver>> _observers; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_EXECUTION_OBSERVEE__ diff --git a/runtime/neurun/core/src/exec/ExecutionObservers.cc b/runtime/neurun/core/src/exec/ExecutionObservers.cc new file mode 100644 index 000000000..071a9e228 --- /dev/null +++ b/runtime/neurun/core/src/exec/ExecutionObservers.cc @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/ExecutionObservers.h" + +#include <string> + +#include "util/logging.h" +#include "ir/operation/Permute.h" +#include "exec/IExecutor.h" +#include "misc/polymorphic_downcast.h" +#include "ir/OpSequence.h" + +namespace neurun +{ + +namespace exec +{ + +void ProfileObserver::handleBegin(neurun::exec::IExecutor *, const ir::OpSequence *, + const neurun::backend::Backend *backend) +{ + _timer = backend->config()->timer(); + if (_timer == nullptr) + throw std::runtime_error("To profile backend timer() method must be implemented"); + _timer->handleBegin(); +} + +void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq, + const backend::Backend *backend) +{ + _timer->handleEnd(); + const auto timer_res = _timer->getTime(); + + // NOTE This assumes there is just one operation in a op_seq + auto node = op_seq->operations().at(0).node; + auto node_name = node->name(); + VERBOSE(ProfileInfo) << "Time for " << node_name << " : " << timer_res << std::endl; + + // fill ExecTime: + bool is_quantized = exec->graph().operands().at(node->getInputs().at(0)).typeInfo().type() == + ir::DataType::QUANT8_ASYMM; + + uint32_t size = 0; + for (const auto &input : node->getInputs()) + { + size += exec->graph().operands().at(input).info().total_size(); + } + for (const auto &output : node->getOutputs()) + { + size += exec->graph().operands().at(output).info().total_size(); + } + if (node_name == "Permute") + { + auto *permute_node = nnfw::misc::polymorphic_downcast<const ir::operation::Permute *>(node); + assert(permute_node != nullptr); + _et->updatePermuteTime(permute_node->param().input_backend_ctx->backend, + permute_node->param().output_backend_ctx->backend, is_quantized, size, + timer_res); + } + else + { + _et->updateOperationExecTime(backend, node_name, is_quantized, size, timer_res); + } +}; + +ChromeTracingObserver::ChromeTracingObserver(const std::string &filepath) + : _ofs{filepath, std::ofstream::out}, _recorder{}, _collector{&_recorder} +{ +} + +ChromeTracingObserver::~ChromeTracingObserver() { _recorder.writeToFile(_ofs); } + +void ChromeTracingObserver::handleBegin(IExecutor *) +{ + _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "runtime", "Graph"}); +} + +void ChromeTracingObserver::handleBegin(IExecutor *, const ir::OpSequence *op_seq, + const backend::Backend *backend) +{ + std::string backend_id = backend->config()->id(); + _collector.onEvent( + EventCollector::Event{EventCollector::Edge::BEGIN, backend_id, subgraphTag(op_seq)}); +} + +void ChromeTracingObserver::handleEnd(IExecutor *, const ir::OpSequence *op_seq, + const backend::Backend *backend) +{ + std::string backend_id = backend->config()->id(); + _collector.onEvent( + EventCollector::Event{EventCollector::Edge::END, backend_id, subgraphTag(op_seq)}); +} + +void ChromeTracingObserver::handleEnd(IExecutor *) +{ + _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, "runtime", "Graph"}); +} + +std::string ChromeTracingObserver::subgraphTag(const ir::OpSequence *op_seq) +{ + if (op_seq->size() == 0) + return "Empty OpSequence"; + + auto first_op = op_seq->operations().at(0); + std::string tag = "$" + std::to_string(first_op.index.value()); + tag += " " + first_op.node->name(); + if (op_seq->size() > 1) + { + tag += " (+" + std::to_string(op_seq->size() - 1) + ")"; + } + return tag; +} + +} // namespace exec + +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/ExecutorBase.cc b/runtime/neurun/core/src/exec/ExecutorBase.cc new file mode 100644 index 000000000..9692c2ba7 --- /dev/null +++ b/runtime/neurun/core/src/exec/ExecutorBase.cc @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ExecutorBase.h" +#include "util/logging.h" +namespace neurun +{ +namespace exec +{ + +ExecutorBase::ExecutorBase(const ir::Graph &graph, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs) + : _graph{graph}, _operand_context{operand_context}, _tensor_mgrs{std::move(tensor_mgrs)}, + _mutex() +{ + // DO NOTHING +} + +std::unique_ptr<ISource> ExecutorBase::source(const ir::IOIndex &index, const ir::TypeInfo &type, + const void *buffer, size_t length, + ir::Layout io_layout) +{ + using ir::DataType; + switch (type.type()) + { + case DataType::FLOAT32: + return source<float>(index, buffer, length, io_layout); + case DataType::INT32: + return source<int32_t>(index, buffer, length, io_layout); + case DataType::UINT32: + return source<uint32_t>(index, buffer, length, io_layout); + case DataType::BOOL8: + case DataType::QUANT8_ASYMM: + case DataType::UINT8: + return source<uint8_t>(index, buffer, length, io_layout); + case DataType::QUANT8_SYMM: + return source<int8_t>(index, buffer, length, io_layout); + default: + throw std::runtime_error("Not supported yet"); + } +} + +std::unique_ptr<ISink> ExecutorBase::sink(const ir::IOIndex &index, const ir::TypeInfo &type, + void *buffer, size_t length, ir::Layout io_layout) +{ + using ir::DataType; + switch (type.type()) + { + case DataType::FLOAT32: + return sink<float>(index, buffer, length, io_layout); + case DataType::INT32: + return sink<int32_t>(index, buffer, length, io_layout); + case DataType::UINT32: + return sink<uint32_t>(index, buffer, length, io_layout); + case DataType::BOOL8: + case DataType::QUANT8_ASYMM: + case DataType::UINT8: + return sink<uint8_t>(index, buffer, length, io_layout); + case DataType::QUANT8_SYMM: + return sink<int8_t>(index, buffer, length, io_layout); + default: + throw std::runtime_error("Not supported yet"); + } +} + +void ExecutorBase::execute(const IODescription &desc) +{ + // For thread-safe, use mutex + // TODO: if all used backends on this executor are thread-safe, + // do not need to use mutex (otherwise, use mutex) + std::lock_guard<std::mutex> lock(_mutex); + + std::vector<std::unique_ptr<ISource>> sources{_graph.getInputs().size()}; + std::vector<std::unique_ptr<ISink>> sinks{_graph.getOutputs().size()}; + + // Set input(s) + for (uint32_t n = 0; n < _graph.getInputs().size(); ++n) + { + ir::IOIndex input_index{n}; + ir::OperandIndex index{_graph.getInputs().at(input_index)}; + + if (desc.inputs.at(n) == nullptr) + { + // Optional input + continue; + } + + const auto operand_li = _graph.getLowerInfo()->operand.at(index).get(); + if (operand_li->def_factors().empty()) + { + // This input is not used (i.e. constant, EX. reshape's axis) + continue; + } + + const auto &input = *desc.inputs.at(n); + sources.at(n) = + source(input_index, input.info.typeInfo(), input.buffer, input.size, input.layout); + + auto setter = [&](::neurun::backend::operand::ITensor &tensor) { sources.at(n)->push(tensor); }; + + auto object = _operand_context->at(index); + + object->access(setter); + } + + executeImpl(); + + // Get output(s) + for (uint32_t n = 0; n < _graph.getOutputs().size(); ++n) + { + ir::IOIndex output_index{n}; + // Optional output + if (desc.outputs.at(n) == nullptr) + { + continue; + } + const auto &output = *desc.outputs.at(n); + sinks.at(n) = + sink(output_index, output.info.typeInfo(), output.buffer, output.size, output.layout); + + auto getter = [&](::neurun::backend::operand::ITensor &tensor) { sinks.at(n)->pull(tensor); }; + + ir::OperandIndex index{_graph.getOutputs().at(output_index)}; + auto object = _operand_context->at(index); + + object->access(getter); + } +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/ExecutorBase.h b/runtime/neurun/core/src/exec/ExecutorBase.h new file mode 100644 index 000000000..a93e036a5 --- /dev/null +++ b/runtime/neurun/core/src/exec/ExecutorBase.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_EXECUTOR_BASE_H__ +#define __NEURUN_EXEC_EXECUTOR_BASE_H__ + +#include <mutex> + +#include "Source.h" +#include "exec/ExecutionObservers.h" +#include "Sink.h" +#include "exec/IExecutor.h" +#include "ir/Graph.h" +#include "ir/LowerInfoMap.h" +#include "backend/IConfig.h" +#include "backend/Backend.h" +#include "compiler/OperandContext.h" +#include "backend/ExecTime.h" +#include "exec/IFunction.h" +#include "backend/ITensorManager.h" +#include "exec/ExecutionObservee.h" +#include <list> + +namespace neurun +{ +namespace exec +{ + +class ExecutorBase : public IExecutor +{ +public: + ExecutorBase(const ir::Graph &graph, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs); + + virtual ~ExecutorBase() = default; + + const ir::Graph &graph() final { return _graph; } + + void execute(const IODescription &desc) final; + + // Used only in Dataflow and Parallel Executors + void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>> ranks) final + { + _indexed_ranks = std::move(ranks); + }; + + virtual void executeImpl(void) = 0; + + void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); }; + +private: + std::unique_ptr<ISource> source(const ir::IOIndex &index, const ir::TypeInfo &type, + const void *buffer, size_t length, ir::Layout io_layout); + std::unique_ptr<ISink> sink(const ir::IOIndex &index, const ir::TypeInfo &type, void *buffer, + size_t length, ir::Layout io_layout); + + template <typename T> + std::unique_ptr<ISource> source(const ir::IOIndex &index, const void *buffer, size_t length, + ir::Layout io_layout) + { + const auto operand_index = _graph.getInputs().at(index); + const auto &operand = _graph.operands().at(operand_index); + + const auto tensor = _operand_context->at(operand_index); + const auto tensor_layout = tensor->layout(); + + if (((io_layout == ir::Layout::NHWC) && (tensor_layout == ir::Layout::NCHW)) || + ((io_layout == ir::Layout::NCHW) && (tensor_layout == ir::Layout::NHWC))) + { + return nnfw::cpp14::make_unique<PermutateSource<T>>(buffer, length, operand.shape(), + io_layout); + } + // TODO Change this to return error + assert(io_layout != ir::Layout::UNKNOWN || + (tensor_layout != ir::Layout::NCHW && tensor_layout != ir::Layout::NCHW)); + + return nnfw::cpp14::make_unique<CopySource<T>>(buffer, length, operand.shape()); + } + + template <typename T> + std::unique_ptr<ISink> sink(const ir::IOIndex &index, void *buffer, size_t length, + ir::Layout io_layout) + { + const auto operand_index = _graph.getOutputs().at(index); + const auto &operand = _graph.operands().at(operand_index); + const auto tensor = _operand_context->at(operand_index); + const auto tensor_layout = tensor->layout(); + + if (((tensor_layout == ir::Layout::NCHW) && (io_layout == ir::Layout::NHWC)) || + ((tensor_layout == ir::Layout::NHWC) && (io_layout == ir::Layout::NCHW))) + { + return nnfw::cpp14::make_unique<PermutateSink<T>>(buffer, length, operand.shape(), io_layout); + } + // TODO Change this to return error + assert(io_layout != ir::Layout::UNKNOWN || + (tensor_layout != ir::Layout::NCHW && tensor_layout != ir::Layout::NCHW)); + + return nnfw::cpp14::make_unique<CopySink<T>>(buffer, length, operand.shape()); + } + +protected: + ExecutionObservee _subject; + std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks; + const ir::Graph &_graph; + std::shared_ptr<compiler::OperandContext> _operand_context; + std::unique_ptr<backend::TensorManagerSet> _tensor_mgrs; + std::mutex _mutex; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_EXECUTOR_BASE_H__ diff --git a/runtime/neurun/core/src/exec/FunctionSequence.cc b/runtime/neurun/core/src/exec/FunctionSequence.cc new file mode 100644 index 000000000..00214fcfa --- /dev/null +++ b/runtime/neurun/core/src/exec/FunctionSequence.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "FunctionSequence.h" + +namespace neurun +{ +namespace exec +{ + +void FunctionSequence::run() +{ + for (const auto &function : _functions) + { + function->run(); + } +} + +void FunctionSequence::runSync() +{ + for (const auto &function : _functions) + { + function->runSync(); + } +} + +void FunctionSequence::prepare() +{ + for (const auto &function : _functions) + { + function->prepare(); + } +} + +void FunctionSequence::append(std::unique_ptr<IFunction> &&function) +{ + _functions.push_back(std::move(function)); +} + +void FunctionSequence::iterate(const std::function<void(IFunction &)> &fn) +{ + for (const auto &func : _functions) + { + fn(*func); + } +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/FunctionSequence.h b/runtime/neurun/core/src/exec/FunctionSequence.h new file mode 100644 index 000000000..2ba5c0b08 --- /dev/null +++ b/runtime/neurun/core/src/exec/FunctionSequence.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_FUNCTION_SEQUENCE_H__ +#define __NEURUN_EXEC_FUNCTION_SEQUENCE_H__ + +#include <memory> +#include <vector> +#include <functional> + +#include "exec/IFunction.h" + +namespace neurun +{ +namespace exec +{ + +class FunctionSequence : public IFunction +{ +public: + virtual ~FunctionSequence() = default; + + void run() override; + void runSync() override; + void prepare() override; + + /** + * @brief Appends an IFunction object to the function sequence + * + * @param function IFunction object to be appended + */ + void append(std::unique_ptr<IFunction> &&function); + + void iterate(const std::function<void(IFunction &)> &fn); + +private: + std::vector<std::unique_ptr<IFunction>> _functions; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_FUNCTION_SEQUENCE_H__ diff --git a/runtime/neurun/core/src/exec/Job.cc b/runtime/neurun/core/src/exec/Job.cc new file mode 100644 index 000000000..ba02daf30 --- /dev/null +++ b/runtime/neurun/core/src/exec/Job.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Job.h" + +#include <cassert> + +#include "util/logging.h" + +namespace neurun +{ +namespace exec +{ + +Job::Job(uint32_t index, IFunction *fn) : _index{index}, _fn{fn} {} + +void Job::run() { _fn->run(); } + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/Job.h b/runtime/neurun/core/src/exec/Job.h new file mode 100644 index 000000000..1516b9281 --- /dev/null +++ b/runtime/neurun/core/src/exec/Job.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_JOB_H__ +#define __NEURUN_EXEC_JOB_H__ + +#include <unordered_set> + +#include "exec/IFunction.h" +#include "ir/Index.h" +#include "ir/OperandIndexSequence.h" +#include "backend/Backend.h" + +namespace neurun +{ +namespace exec +{ + +class Job +{ +public: + /** + * @brief Constructs a Job object + * + * @param index Operation index for this job + * @param fn compiled code to run this job + * @param inputs Input operand list + * @param outputs Output operand list + */ + Job(uint32_t index, IFunction *fn); + /** + * @brief Execute the compiled code + */ + void run(); + /** + * @brief Return job index + * + * @return Job index + */ + uint32_t index() const { return _index; } + /** + * @brief Return the function to be executed + * + * @return Pointer of the function + */ + IFunction *fn() { return _fn; } + +private: + uint32_t _index; + IFunction *_fn; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_JOB_H__ diff --git a/runtime/neurun/core/src/exec/LinearExecutor.cc b/runtime/neurun/core/src/exec/LinearExecutor.cc new file mode 100644 index 000000000..d41dba880 --- /dev/null +++ b/runtime/neurun/core/src/exec/LinearExecutor.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "LinearExecutor.h" + +namespace neurun +{ +namespace exec +{ + +void LinearExecutor::executeImpl() +{ + _subject.notifyModelBegin(this); + for (auto &&code : _code) + { + const auto op_seq = code.elem.op_seq; + const auto backend = code.elem.lower_info->backend(); + _subject.notifyJobBegin(this, op_seq, backend); + code.fn->run(); + _subject.notifyJobEnd(this, op_seq, backend); + } + _subject.notifyModelEnd(this); +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/LinearExecutor.h b/runtime/neurun/core/src/exec/LinearExecutor.h new file mode 100644 index 000000000..baf063a12 --- /dev/null +++ b/runtime/neurun/core/src/exec/LinearExecutor.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file LinearExecutor.h + * @brief This file contains LinearExecutor class to define and run execution phase + */ + +#ifndef __NEURUN_EXEC_EXECUTOR_H_ +#define __NEURUN_EXEC_EXECUTOR_H_ + +#include "ExecutorBase.h" +#include "compiler/Linear.h" +#include "exec/FunctionSequence.h" +#include "compiler/CodeWithInfo.h" + +namespace neurun +{ +namespace exec +{ + +/** + * @brief Class to handle execution phase. Simple run the sequence of operations that is sorted in + * topological order + */ +class LinearExecutor final : public ExecutorBase +{ +public: + /** + * @brief Construct a new LinearExecutor object + * @param[in] plan Execution plan generated by compiled result + */ + LinearExecutor(const ir::Graph &graph, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, + std::vector<compiler::CodeWithInfo> &&code) + : ExecutorBase{graph, operand_context, std::move(tensor_mgrs)}, _code{std::move(code)} + { + } + +public: + void executeImpl(void) override; + +private: + std::vector<compiler::CodeWithInfo> _code; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_EXECUTOR_H_ diff --git a/runtime/neurun/core/src/exec/ParallelExecutor.cc b/runtime/neurun/core/src/exec/ParallelExecutor.cc new file mode 100644 index 000000000..c73c353d3 --- /dev/null +++ b/runtime/neurun/core/src/exec/ParallelExecutor.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ParallelExecutor.h" + +#include <cassert> + +#include "util/logging.h" +#include "exec/IFunction.h" + +namespace neurun +{ +namespace exec +{ + +class HookFunction : public IFunction +{ +public: + HookFunction(IFunction *fn, const std::function<void()> &setup, + const std::function<void()> &teardown) + : _fn{fn}, _setup{setup}, _teardown{teardown} + { + } + +public: + void run() override + { + _setup(); + _fn->run(); + _teardown(); + } + void runSync() override { throw("runSync is needed just for profiling in Dataflow executor"); } + +private: + IFunction *_fn; + std::function<void()> _setup; + std::function<void()> _teardown; +}; + +void ParallelExecutor::notify(uint32_t finished_job_id) +{ + std::unique_lock<std::mutex> lock{_mu_jobs}; + + DataflowExecutor::notify(finished_job_id); + + lock.unlock(); + _cv_jobs.notify_all(); +} + +ParallelExecutor::ParallelExecutor(const ir::Graph &graph, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, + CodeMap &&code_map) + : DataflowExecutor{graph, operand_context, std::move(tensor_mgrs), std::move(code_map)} +{ + VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl; +} + +void ParallelExecutor::executeImpl() +{ + // Init scheduler + // TODO Consider to have distinct backend set in LowerInfoMap + ir::BackendSet backends; + for (auto &itr : _graph.getLowerInfo()->operation) + { + backends.add(itr.second->backend()); + } + _scheduler = nnfw::cpp14::make_unique<ParallelScheduler>(backends); + + assert(noWaitingJobs()); + + // Execution setup + _waiting_jobs.swap(_finished_jobs); // Move finished jobs to waiting jobs + + for (uint32_t i = 0; i < _waiting_jobs.size(); ++i) + { + VERBOSE(ParallelExecutor) << i << ": " << _input_info[i] << std::endl; + if (_input_info[i] == 0) + { + emplaceToReadyJobs(i); + } + } + assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs + + VERBOSE(ParallelExecutor) << "INITIAL JOBS : " << _ready_jobs.size() << std::endl; + + _subject.notifyModelBegin(this); + while (true) + { + std::unique_lock<std::mutex> lock{_mu_jobs}; + + if (_ready_jobs.empty()) + { + _cv_jobs.wait(lock, [this] { return !_ready_jobs.empty() || noWaitingJobs(); }); + // Check finish condition + if (_ready_jobs.empty() && noWaitingJobs()) + { + break; + } + } + + auto job = std::move(_ready_jobs.begin()->second); + _ready_jobs.erase(_ready_jobs.begin()); + + lock.unlock(); + + VERBOSE(ParallelExecutor) << "Assigning fn #" << job->index() << std::endl; + + auto job_index = job->index(); + auto subgraph_index = _job_to_op_seq[job_index]; + auto op_seq = &_graph.subgraphs()->at(subgraph_index); + auto backend = _graph.getLowerInfo()->operation.at(subgraph_index)->backend(); + auto setup = [&, op_seq, backend]() { _subject.notifyJobBegin(this, op_seq, backend); }; + auto teardown = [&, job_index, op_seq, backend]() { + _subject.notifyJobEnd(this, op_seq, backend); + notify(job_index); + }; + + _scheduler->assign(nnfw::cpp14::make_unique<HookFunction>(job->fn(), setup, teardown), backend); + _finished_jobs[job_index] = std::move(job); + } + + assert(noWaitingJobs()); + + // Wait for all the jobs done + _scheduler->finish(); + _subject.notifyModelEnd(this); + + // Reset input info for the next execution + _input_info = _initial_input_info; +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/ParallelExecutor.h b/runtime/neurun/core/src/exec/ParallelExecutor.h new file mode 100644 index 000000000..54377fd9e --- /dev/null +++ b/runtime/neurun/core/src/exec/ParallelExecutor.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_PARALLEL_EXECUTOR_H__ +#define __NEURUN_EXEC_PARALLEL_EXECUTOR_H__ + +#include <list> +#include <queue> +#include <unordered_map> + +#include "FunctionSequence.h" +#include "Job.h" +#include "ir/OperandIndexSequence.h" +#include "ir/Index.h" +#include "cpp14/memory.h" +#include "exec/DataflowExecutor.h" +#include "ParallelScheduler.h" + +namespace neurun +{ +namespace exec +{ + +/** + * @brief Class to execute Graph in parallel + */ +class ParallelExecutor : public DataflowExecutor +{ +protected: + void notify(uint32_t finished_job_id) override; + +public: + /** + * @brief Constructs a ParallelExecutor object + * + * @param graph Graph object + * @param operand_context (Only for input/output operand data access) + * @param code_map Compiled code map + * @param ranks Operation ranks for ordering execution + */ + ParallelExecutor(const ir::Graph &graph, + const std::shared_ptr<compiler::OperandContext> &operand_context, + std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, CodeMap &&code_map); + + void executeImpl() override; + +private: + std::condition_variable _cv_jobs; + std::mutex _mu_jobs; + std::unique_ptr<ParallelScheduler> _scheduler; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_PARALLEL_EXECUTOR_H__ diff --git a/runtime/neurun/core/src/exec/ParallelScheduler.cc b/runtime/neurun/core/src/exec/ParallelScheduler.cc new file mode 100644 index 000000000..5f9e9e013 --- /dev/null +++ b/runtime/neurun/core/src/exec/ParallelScheduler.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ParallelScheduler.h" + +#include <cassert> + +#include "cpp14/memory.h" +#include "util/logging.h" + +namespace neurun +{ +namespace exec +{ + +ParallelScheduler::ParallelScheduler(const ir::BackendSet &backends) +{ + assert(!backends.empty()); + + for (auto backend : backends) + { + _thread_pools[backend] = nnfw::cpp14::make_unique<ThreadPool>(); + } +} + +void ParallelScheduler::assign(std::unique_ptr<IFunction> &&fn, const backend::Backend *backend) +{ + assert(!_thread_pools.empty()); + + _thread_pools.at(backend)->enqueue(std::move(fn)); +} + +void ParallelScheduler::finish() +{ + for (auto &itr : _thread_pools) + { + itr.second->finish(); + } +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/ParallelScheduler.h b/runtime/neurun/core/src/exec/ParallelScheduler.h new file mode 100644 index 000000000..af1103750 --- /dev/null +++ b/runtime/neurun/core/src/exec/ParallelScheduler.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_PARALLEL_SCHEDULER_H__ +#define __NEURUN_EXEC_PARALLEL_SCHEDULER_H__ + +#include <unordered_map> +#include <memory> + +#include "exec/IFunction.h" +#include "ir/BackendSet.h" +#include "ThreadPool.h" + +namespace neurun +{ +namespace exec +{ + +class ParallelScheduler +{ +public: + /** + * @brief Constructs ParallelScheduler object + * + * @param backends Backend set + */ + ParallelScheduler(const ir::BackendSet &backends); + /** + * @brief Assign a task to the given backend + * + * @param[in] fn Function to be assigned + * @param[in] fn Target backend + */ + void assign(std::unique_ptr<IFunction> &&fn, const backend::Backend *backend); + /** + * @brief Block until all jobs are finished + */ + void finish(); + +private: + std::unordered_map<const backend::Backend *, std::unique_ptr<ThreadPool>> _thread_pools; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_PARALLEL_SCHEDULER_H__ diff --git a/runtime/neurun/core/src/exec/Sink.h b/runtime/neurun/core/src/exec/Sink.h new file mode 100644 index 000000000..bb2a6c58a --- /dev/null +++ b/runtime/neurun/core/src/exec/Sink.h @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_SINK_H__ +#define __NEURUN_EXEC_SINK_H__ + +#include <cassert> + +#include "cpp14/memory.h" +#include "util/feature/nchw/Reader.h" +#include "util/feature/nchw/View.h" +#include "util/feature/nhwc/Reader.h" +#include "util/feature/nhwc/View.h" +#include "util/Utils.h" +#include <misc/feature/IndexIterator.h> + +namespace neurun +{ +namespace exec +{ +struct ISink +{ + virtual ~ISink() = default; + + virtual void pull(::neurun::backend::operand::ITensor &tensor) const = 0; +}; + +// Create second lever inheritance: the first lever is used as a reference type in use-case places +template <typename T> class ITemplSink : public ISink +{ +public: + ITemplSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape, + const bool copy, ir::Layout io_layout) + : _output_buffer{reinterpret_cast<T *>(output_buffer)}, _output_size{output_size}, + _shape{shape}, _copy{copy}, _io_layout{io_layout} + { + } + +protected: + void pullUnif(neurun::backend::operand::ITensor &tensor) const + { + assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) || + (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) || + _copy); + auto input_buffer = tensor.buffer(); + auto rank = _shape.rank(); + + if (!tensor.has_padding() && rank < 4 + _copy) + { + memcpy(_output_buffer, input_buffer, _output_size); + return; + } + + switch (rank) + { + case 0: + case 1: + { + memcpy(_output_buffer, input_buffer, _output_size); + break; + } + case 2: + { + const int32_t copy_len = _shape.dim(1); + + for (auto i = 0; i < _shape.dim(0); ++i) + { + neurun::util::Coordinates coords{i, 0}; + memcpy(_output_buffer + i * copy_len, input_buffer + tensor.calcOffset(coords), + copy_len * sizeof(T)); + } + break; + } + case 3: + { + const int32_t dim1 = _shape.dim(1); + const int32_t dim2 = _shape.dim(2); + + for (auto i = 0; i < _shape.dim(0); ++i) + { + for (auto j = 0; j < _shape.dim(1); ++j) + { + neurun::util::Coordinates coords{i, j, 0}; + memcpy(_output_buffer + i * dim1 * dim2 + j * dim2, + input_buffer + tensor.calcOffset(coords), dim2 * sizeof(T)); + } + } + break; + } + case 4: + { + if (_copy) + { + const int32_t dim1 = _shape.dim(1); + const int32_t dim2 = _shape.dim(2); + const int32_t dim3 = _shape.dim(3); + + for (auto i = 0; i < _shape.dim(0); ++i) + { + for (auto j = 0; j < _shape.dim(1); ++j) + { + for (auto k = 0; k < _shape.dim(2); ++k) + { + neurun::util::Coordinates coords{i, j, k, 0}; + memcpy(_output_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3, + input_buffer + tensor.calcOffset(coords), dim3 * sizeof(T)); + } + } + } + } + else + { + const auto shape = _shape.asFeature(_io_layout); + + if (_io_layout == ir::Layout::NHWC) + { + const util::feature::nchw::Reader<T> from(&tensor); + util::feature::nhwc::View<T> into(shape, _output_buffer, _output_size); + ::nnfw::misc::feature::iterate(shape) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, ch, row, col); + into.at(batch, row, col, ch) = value; + }; + } + else if (_io_layout == ir::Layout::NCHW) + { + const util::feature::nhwc::Reader<T> from(&tensor); + util::feature::nchw::View<T> into(shape, _output_buffer, _output_size); + ::nnfw::misc::feature::iterate(shape) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, row, col, ch); + into.at(batch, ch, row, col) = value; + }; + } + else + { + throw std::runtime_error("Wrong Layout"); + } + } + break; + } + default: + throw std::runtime_error("NYI"); + break; + } + } + +private: + T *_output_buffer; + const size_t _output_size; + const ir::Shape _shape; + const bool _copy; + const ir::Layout _io_layout; +}; + +template <typename T> class PermutateSink final : public ITemplSink<T> +{ +public: + PermutateSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape, + ir::Layout io_layout) + : ITemplSink<T>(output_buffer, output_size, shape, false, io_layout) + { + } + +public: + void pull(neurun::backend::operand::ITensor &tensor) const override + { + ITemplSink<T>::pullUnif(tensor); + } +}; + +// Only supports NHWC format front-end(NNAPI) now +template <typename T> class CopySink final : public ITemplSink<T> +{ +public: + CopySink(void *output_buffer, const size_t &output_size, const ir::Shape &shape, + ir::Layout io_layout = ir::Layout::UNKNOWN) + : ITemplSink<T>(output_buffer, output_size, shape, true, io_layout) + { + } + +public: + void pull(neurun::backend::operand::ITensor &tensor) const override + { + ITemplSink<T>::pullUnif(tensor); + } +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_SINK_H__ diff --git a/runtime/neurun/core/src/exec/Source.h b/runtime/neurun/core/src/exec/Source.h new file mode 100644 index 000000000..fd52dd546 --- /dev/null +++ b/runtime/neurun/core/src/exec/Source.h @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_SOURCE_H__ +#define __NEURUN_EXEC_SOURCE_H__ + +#include <cassert> + +#include "cpp14/memory.h" +#include "util/feature/nchw/Reader.h" +#include "util/feature/nchw/View.h" +#include "util/feature/nhwc/Reader.h" +#include "util/feature/nhwc/View.h" +#include "util/Utils.h" +#include <misc/feature/IndexIterator.h> +#include <ir/Layout.h> +#include "ir/Shape.h" + +namespace neurun +{ +namespace exec +{ + +struct ISource +{ + virtual ~ISource() = default; + + virtual void push(::neurun::backend::operand::ITensor &tensor) const = 0; +}; + +// Create second lever inheritance: the first lever is used as a reference type in use-case places +template <typename T> class ITemplSource : public ISource +{ +public: + ITemplSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape, + const bool copy, ir::Layout io_layout) + : _input_buffer{reinterpret_cast<const T *>(input_buffer)}, _input_size{input_size}, + _shape{shape}, _copy(copy), _io_layout{io_layout} + { + } + + virtual void push(::neurun::backend::operand::ITensor &tensor) const = 0; + +protected: + void pushUnif(neurun::backend::operand::ITensor &tensor) const + { + assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) || + (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) || + _copy); + auto output_buffer = tensor.buffer(); + auto rank = _shape.rank(); + + if (!tensor.has_padding() && rank < 4 + _copy) + { + memcpy(output_buffer, _input_buffer, _input_size); + return; + } + + switch (rank) + { + case 0: + case 1: + { + memcpy(output_buffer, _input_buffer, _input_size); + break; + } + case 2: + { + const int32_t copy_len = _shape.dim(1); + + for (auto i = 0; i < _shape.dim(0); ++i) + { + neurun::util::Coordinates coords{i, 0}; + memcpy(output_buffer + tensor.calcOffset(coords), _input_buffer + i * copy_len, + copy_len * sizeof(T)); + } + break; + } + case 3: + { + const int32_t dim1 = _shape.dim(1); + const int32_t dim2 = _shape.dim(2); + + for (auto i = 0; i < _shape.dim(0); ++i) + { + for (auto j = 0; j < _shape.dim(1); ++j) + { + neurun::util::Coordinates coords{i, j, 0}; + memcpy(output_buffer + tensor.calcOffset(coords), + _input_buffer + i * dim1 * dim2 + j * dim2, dim2 * sizeof(T)); + } + } + break; + } + case 4: + { + if (_copy) + { + const int32_t dim1 = _shape.dim(1); + const int32_t dim2 = _shape.dim(2); + const int32_t dim3 = _shape.dim(3); + for (auto i = 0; i < _shape.dim(0); ++i) + { + for (auto j = 0; j < _shape.dim(1); ++j) + { + for (auto k = 0; k < _shape.dim(2); ++k) + { + neurun::util::Coordinates coords{i, j, k, 0}; + memcpy(output_buffer + tensor.calcOffset(coords), + _input_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3, + dim3 * sizeof(T)); + } + } + } + } + else + { + const auto shape = _shape.asFeature(_io_layout); + + if (_io_layout == ir::Layout::NCHW) + { + const util::feature::nchw::Reader<T> from(shape, _input_buffer, _input_size); + util::feature::nhwc::View<T> into(&tensor); + ::nnfw::misc::feature::iterate(shape) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, ch, row, col); + into.at(batch, row, col, ch) = value; + }; + } + else if (_io_layout == ir::Layout::NHWC) + { + const util::feature::nhwc::Reader<T> from(shape, _input_buffer, _input_size); + util::feature::nchw::View<T> into(&tensor); + ::nnfw::misc::feature::iterate(shape) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, row, col, ch); + into.at(batch, ch, row, col) = value; + }; + } + else + { + throw std::runtime_error("Wrong Layout"); + } + } + + break; + } + default: + throw std::runtime_error("NYI"); + break; + } + } + +private: + const T *_input_buffer; + const size_t _input_size; + const ir::Shape _shape; + const bool _copy; + const ir::Layout _io_layout; +}; + +template <typename T> class PermutateSource final : public ITemplSource<T> +{ +public: + PermutateSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape, + ir::Layout io_layout) + : ITemplSource<T>(input_buffer, input_size, shape, false, io_layout) + { + } + +public: + void push(neurun::backend::operand::ITensor &tensor) const override + { + // do NHWC_TO_NCHW or NCHW_TO_NHWC permutation + ITemplSource<T>::pushUnif(tensor); + } +}; + +template <typename T> class CopySource final : public ITemplSource<T> +{ +public: + CopySource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape, + ir::Layout io_layout = ir::Layout::UNKNOWN) + : ITemplSource<T>(input_buffer, input_size, shape, true, io_layout) + { + } + +public: + void push(neurun::backend::operand::ITensor &tensor) const override + { + ITemplSource<T>::pushUnif(tensor); + } +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_SOURCE_H__ diff --git a/runtime/neurun/core/src/exec/ThreadPool.cc b/runtime/neurun/core/src/exec/ThreadPool.cc new file mode 100644 index 000000000..d8c706e30 --- /dev/null +++ b/runtime/neurun/core/src/exec/ThreadPool.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ThreadPool.h" + +#include <cassert> + +namespace neurun +{ +namespace exec +{ + +ThreadPool::ThreadPool(uint32_t num_threads) +{ + assert(num_threads >= 1); + + for (uint32_t i = 0; i < num_threads; i++) + { + _threads.emplace_back(std::ref(_worker)); + } +} + +ThreadPool::~ThreadPool() +{ + if (!_threads.empty()) + { + _worker.terminate(); + join(); + } +} + +void ThreadPool::enqueue(std::unique_ptr<IFunction> &&fn) { _worker.enqueue(std::move(fn)); } + +uint32_t ThreadPool::numJobsInQueue() { return _worker.numJobsInQueue(); } + +void ThreadPool::join() +{ + for (auto &thread : _threads) + { + thread.join(); + } + _threads.clear(); +} + +void ThreadPool::finish() +{ + _worker.finish(); + join(); +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/ThreadPool.h b/runtime/neurun/core/src/exec/ThreadPool.h new file mode 100644 index 000000000..a1a027617 --- /dev/null +++ b/runtime/neurun/core/src/exec/ThreadPool.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_THREAD_POOL_H__ +#define __NEURUN_EXEC_THREAD_POOL_H__ + +#include <thread> +#include <memory> +#include <vector> + +#include "WorkQueue.h" + +namespace neurun +{ +namespace exec +{ + +class ThreadPool +{ +public: + /** + * @brief Coustruct ThreadPool object + * + * @param num_threads Number of threads + */ + ThreadPool(uint32_t num_threads = 1); + /** + * @brief Destroy ThreadPool object + */ + ~ThreadPool(); + /** + * @brief Enqueue a function + * + * @param fn A function to be queued + */ + void enqueue(std::unique_ptr<IFunction> &&fn); + /** + * @brief Get number of jobs in worker's queue + * + * @return Number of jobs + */ + uint32_t numJobsInQueue(); + + /** + * @brief Block until all jobs are finished + */ + void finish(); + +private: + void join(); + +private: + WorkQueue _worker; + std::vector<std::thread> _threads; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_THREAD_POOL_H__ diff --git a/runtime/neurun/core/src/exec/WorkQueue.cc b/runtime/neurun/core/src/exec/WorkQueue.cc new file mode 100644 index 000000000..6712554ac --- /dev/null +++ b/runtime/neurun/core/src/exec/WorkQueue.cc @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "WorkQueue.h" + +#include <cassert> + +namespace neurun +{ +namespace exec +{ + +WorkQueue::~WorkQueue() +{ + { + std::unique_lock<std::mutex> lock(_mu); + _state = State::FORCE_FINISHING; + } + _cv.notify_all(); +} + +void WorkQueue::operator()() +{ + while (true) + { + std::unique_ptr<IFunction> fn = nullptr; + + { + std::unique_lock<std::mutex> lock{_mu}; + _cv.wait(lock, [this] { + return (_state == State::FORCE_FINISHING) || (_state == State::FINISHING) || + (_state == State::ONLINE && !_functions.empty()); + }); + + if (_state == State::FORCE_FINISHING) + { + assert(_functions.empty() && "Terminating with unfinished jobs"); + return; + } + else if (_state == State::FINISHING && _functions.empty()) + { + return; + } + else + { + assert(((_state == State::FINISHING) || (_state == State::ONLINE)) && !_functions.empty()); + fn = std::move(_functions.front()); + _functions.pop(); + } + } + + assert(fn); + fn->run(); + } +} + +void WorkQueue::enqueue(std::unique_ptr<IFunction> &&fn) +{ + { + std::unique_lock<std::mutex> lock{_mu}; + _functions.emplace(std::move(fn)); + } + _cv.notify_one(); +} + +void WorkQueue::terminate() +{ + { + std::unique_lock<std::mutex> lock{_mu}; + _state = State::FORCE_FINISHING; + } + _cv.notify_all(); +} + +void WorkQueue::finish() +{ + { + std::unique_lock<std::mutex> lock{_mu}; + _state = State::FINISHING; + } + _cv.notify_all(); +} + +uint32_t WorkQueue::numJobsInQueue() +{ + std::unique_lock<std::mutex> lock{_mu}; + return _functions.size(); +} + +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/WorkQueue.h b/runtime/neurun/core/src/exec/WorkQueue.h new file mode 100644 index 000000000..cdbadfb8f --- /dev/null +++ b/runtime/neurun/core/src/exec/WorkQueue.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_WORK_QUEUE_H__ +#define __NEURUN_EXEC_WORK_QUEUE_H__ + +#include <condition_variable> +#include <memory> +#include <mutex> +#include <queue> + +#include "exec/IFunction.h" + +namespace neurun +{ +namespace exec +{ + +class WorkQueue +{ +public: + enum class State + { + ONLINE, + FINISHING, + FORCE_FINISHING + }; + +public: + /** + * @brief Create WorkQueue object + */ + WorkQueue() = default; + /** + * @brief Destroy WorkQueue object + */ + ~WorkQueue(); + /** + * @brief Thread entry function + */ + void operator()(); + /** + * @brief Push the given Task to the job queue + * + * @param fn Function to be executed(a job) + */ + void enqueue(std::unique_ptr<IFunction> &&fn); + /** + * @brief Flag as terminating so all the worker threads can terminate + */ + void terminate(); + /** + * @brief Flag as terminating so all the worker threads can terminate + */ + void finish(); + /** + * @brief Check if it has pending jobs. Even if this returns fals, WorkQueue threads may be still + * running + * + * @return true if the job queue not empty otherwise false + */ + uint32_t numJobsInQueue(); + +private: + State _state{State::ONLINE}; + std::queue<std::unique_ptr<IFunction>> _functions; + std::mutex _mu; + std::condition_variable _cv; +}; + +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_WORK_QUEUE_H__ diff --git a/runtime/neurun/core/src/exec/interp/Buffer.h b/runtime/neurun/core/src/exec/interp/Buffer.h new file mode 100644 index 000000000..d60b59a2f --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Buffer.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Buffer.h + * @brief This file contains Buffer interface and InternalBuffer, ExternalBuffer class + */ +#ifndef __NEURUN_EXEC_INTERP_BUFFER_H__ +#define __NEURUN_EXEC_INTERP_BUFFER_H__ + +#include <cpp14/memory.h> + +#include "ir/Data.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Interface for writable data area + */ +class Buffer : public ir::Data +{ +public: + /** + * @brief Return writable pointer for data area + * @return Writable pointer + */ + virtual uint8_t *baseWritable(void) const = 0; +}; + +/** + * @brief Class for internally allocated data area + */ +class InternalBuffer final : public Buffer +{ +public: + InternalBuffer(size_t size) : _base{nnfw::cpp14::make_unique<uint8_t[]>(size)}, _size{size} + { + // DO NOTHING + } + +public: + size_t size(void) const override { return _size; } + const uint8_t *base(void) const override { return _base.get(); } + uint8_t *baseWritable(void) const override { return _base.get(); } + +private: + std::unique_ptr<uint8_t[]> _base; + size_t _size; +}; + +/** + * @brief Class for data area from outside + */ +class ExternalBuffer final : public Buffer +{ +public: + ExternalBuffer(uint8_t *base, size_t size) : _base{base}, _size{size} + { + // DO NOTHING + } + +public: + size_t size(void) const override { return _size; } + const uint8_t *base(void) const override { return _base; } + uint8_t *baseWritable(void) const override { return _base; } + +private: + uint8_t *_base; + size_t _size; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_BUFFER_H__ diff --git a/runtime/neurun/core/src/exec/interp/ExecEnv.h b/runtime/neurun/core/src/exec/interp/ExecEnv.h new file mode 100644 index 000000000..0f7d45e2a --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/ExecEnv.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file ExecEnv.h + * @brief This file contains ExecEnv to access interpreter tensor and execution status + */ +#ifndef __NEURUN_EXEC_INTERP_EXEC_ENV_H_ +#define __NEURUN_EXEC_INTERP_EXEC_ENV_H_ + +#include <unordered_set> + +#include "ir/Graph.h" +#include "Tensor.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Class to gather interpreter execution environment + * Each interpreter instance own execution environment + */ +class ExecEnv +{ +public: + /** + * @brief Construct a new Exec Env object (deleted) + */ + ExecEnv(void) = delete; + /** + * @brief Construct a new ExecEnv object + * @param[in] graph Graph to execute by interpreter + */ + explicit ExecEnv(const ir::Graph &graph) : _graph(graph) + { + // DO NOTHING + } + +public: + /** + * @brief Return graph to execute + * @return Graph + */ + const ir::Graph &graph(void) const { return _graph; } + /** + * @brief Assign tensor to environment which have allocated or assigned buffer + * @param[in] index Tensor index + * @param[in] tensor Tensor + */ + void assignTensor(const ir::OperandIndex index, std::shared_ptr<ITensor> tensor) + { + assert(tensor->bufferRO() != nullptr); + _tensors.emplace(index, tensor); + } + + /** + * @brief Return tensor pointer in environment + * @param[in] index Tensor index + * @return Tensor pointer + */ + const ITensor *tensorAt(const ir::OperandIndex index) const { return _tensors.at(index).get(); } + + /** + * @brief Check environment contains tensor + * @param[in] index Tensor index + * @return @c true if environment contain tensor, otherwise @c false + */ + bool contains(const ir::OperandIndex index) const + { + return (_tensors.find(index) != _tensors.end()); + } + + /** + * @brief Allocate tensor using operand info + * @param[in] index Tensor index + * @param[in] info Operand info + * @note If already allocated, just return + * @TODO More smart allocation policy + */ + void allocateIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info) + { + // already allocated, or constant + if (contains(index)) + { + return; + } + + auto tensor = std::make_shared<Tensor>(info); + tensor->setBuffer(std::make_shared<InternalBuffer>(tensor->total_size())); + assignTensor(index, tensor); + _buffers.insert(index); + } + + /** + * @brief Allocate read-only tensor and share data with other tensor + * @param[in] index Tensor index + * @param[in] info Operand info + * @param[in] index_to_share Tensor index that have data to share + */ + void allocateAndShareIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info, + const ir::OperandIndex index_to_share) + { + if (!contains(index_to_share)) + { + throw std::runtime_error{"Cannot find tensor to share data"}; + } + + // already allocated + if (contains(index)) + { + return; + } + else + { + auto tensor = std::make_shared<ROTensor>(info); + tensor->setData(tensorAt(index_to_share)->shareData()); + assignTensor(index, tensor); + _buffers.insert(index); + } + } + + /** + * @brief Free buffer if allocated by allocateIfNeed + * @param[in] index Tensor index + * @note If allocated by outside, just return + */ + void freeIfAllocated(const ir::OperandIndex index) + { + if (_buffers.find(index) != _buffers.end()) + { + _tensors.at(index)->releaseData(); + } + } + +private: + const ir::Graph &_graph; + // Tensor map to use in interpreter + // It should map tensors that have allocated or assigned buffer pointer + std::unordered_map<ir::OperandIndex, std::shared_ptr<ITensor>> _tensors; + // Tensors allocated by allocateIfNeed (buffer) + std::unordered_set<ir::OperandIndex> _buffers; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_EXEC_ENV_H_ diff --git a/runtime/neurun/core/src/exec/interp/ExecManager.cc b/runtime/neurun/core/src/exec/interp/ExecManager.cc new file mode 100644 index 000000000..92f182c06 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/ExecManager.cc @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ExecManager.h" +#include "ExecEnv.h" +#include "Interpreter.h" + +#include "util/logging.h" + +#include <cpp14/memory.h> + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +void ExecManager::execute(const IODescription &desc) +{ + /************************************************************************ + * Prepare execution model (submodel) + It may execute divided model + but now consider model inference is done at interpreter + ***********************************************************************/ + ir::OperandIndexMap<std::shared_ptr<ITensor>> tensor_map; + + for (uint32_t n = 0; n < _graph.getInputs().size(); n++) + { + ir::IOIndex index{n}; + const auto input_index = _graph.getInputs().at(index); + const auto &input = *desc.inputs.at(n); + + auto input_tensor = std::make_shared<ROTensor>(input.info); + input_tensor->setData(std::make_shared<const ir::ExternalData>( + reinterpret_cast<const uint8_t *>(input.buffer), input.size)); + tensor_map[input_index] = input_tensor; + } + + for (uint32_t n = 0; n < _graph.getOutputs().size(); n++) + { + ir::IOIndex index{n}; + const auto output_index = _graph.getOutputs().at(index); + const auto &output = *desc.outputs.at(n); + + auto output_tensor = std::make_shared<Tensor>(output.info); + output_tensor->setBuffer( + std::make_shared<ExternalBuffer>(reinterpret_cast<uint8_t *>(output.buffer), output.size)); + tensor_map[output_index] = output_tensor; + } + + /************************************************************************ + * Prepare execution environment + Execution environment will be assigned to invoked interpreter instance + ***********************************************************************/ + + std::unique_ptr<ExecEnv> interp_env = nnfw::cpp14::make_unique<ExecEnv>(_graph); + + // Assign input tensor into interpreter execution environment + for (auto index : _graph.getInputs()) + { + if (tensor_map.find(index) != tensor_map.end()) + { + VERBOSE(INTERPRETER) << "Assign input tensor. operand index:" << index.value() << std::endl; + interp_env->assignTensor(index, tensor_map.at(index)); + } + } + + // Assign output tensor into interpreter execution environment + for (auto index : _graph.getOutputs()) + { + if (tensor_map.find(index) != tensor_map.end()) + { + VERBOSE(INTERPRETER) << "Assign output tensor. operand index: " << index.value() << std::endl; + interp_env->assignTensor(index, tensor_map.at(index)); + } + } + + // Allocate constant tensor + _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { + if (obj.isConstant()) + { + VERBOSE(INTERPRETER) << "Allocate and assign constant tensor. operand index:" << ind.value() + << std::endl; + + auto const_tensor = std::make_shared<ROTensor>(obj.info()); + // Assume that interpreter's tensor layout is same with model (NHWC) + const_tensor->setData( + std::make_shared<ir::ExternalData>(obj.data().base(), obj.info().total_size())); + interp_env->assignTensor(ind, const_tensor); + } + }); + + /***************************************************************************** + * Invoke interpreter + ****************************************************************************/ + + Interpreter interp(std::move(interp_env)); + interp.run(); + + /***************************************************************************** + * Invoked interpreter run is finished + ****************************************************************************/ + + // If interpreter execute submodel + // 1. Get tensor output of submodel into tensor_map to save result + // 2. Generate new ExecEnv for next interpretation +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/ExecManager.h b/runtime/neurun/core/src/exec/interp/ExecManager.h new file mode 100644 index 000000000..f952abf02 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/ExecManager.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file ExecManager.h + * @brief This file contains ExecManager class\n + * to manage interpreter execution and environment + */ +#ifndef __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_ +#define __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_ + +#include "ir/Graph.h" +#include "exec/IExecutor.h" +#include "Tensor.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Class to execute model using interpreter + */ +class ExecManager final : public IExecutor +{ +public: + explicit ExecManager(const ir::Graph &graph) : _graph(graph) + { + // DO NOTHING + } + +public: + /** + * @brief Return graph object + * @return Graph object + */ + const ir::Graph &graph() final { return _graph; } + void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) override{ + // Not implemented + }; + /** + * @brief Start execution + * @note It should be called after setting input and output buffer + */ + void execute(const IODescription &desc) final; + +private: + const ir::Graph &_graph; + ir::OperandIndexMap<std::shared_ptr<ITensor>> _tensor_map; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_ diff --git a/runtime/neurun/core/src/exec/interp/Interpreter.cc b/runtime/neurun/core/src/exec/interp/Interpreter.cc new file mode 100644 index 000000000..8373419f6 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Interpreter.cc @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Interpreter.h" + +#include <stack> +#include <unordered_set> + +#include "Registration.h" + +#include "ir/OperandIndexMap.h" +#include "util/logging.h" +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +// TODO more structured execution kernel implementation +// TODO use cker for execution +// TODO divide tensor prepare and execution +// TODO introduce memory manager (buffer allocate and free) +class OperationExecutor : ir::OperationVisitor +{ +public: + OperationExecutor(ExecEnv *env) : _env{env} + { + _kernels[ir::OpCode::Add] = getAdd(); + _kernels[ir::OpCode::Sub] = getSub(); + _kernels[ir::OpCode::Mul] = getMul(); + _kernels[ir::OpCode::Conv2D] = getConv2D(); + _kernels[ir::OpCode::MaxPool2D] = getMaxPool2D(); + _kernels[ir::OpCode::Concat] = getConcat(); + _kernels[ir::OpCode::Gather] = getGather(); + _kernels[ir::OpCode::AvgPool2D] = getAvgPool2D(); + _kernels[ir::OpCode::FullyConnected] = getFullyConnected(); + _kernels[ir::OpCode::InstanceNorm] = getInstanceNorm(); + _kernels[ir::OpCode::Softmax] = getSoftMax(); + _kernels[ir::OpCode::Reshape] = getReshape(); + _kernels[ir::OpCode::DepthwiseConv2D] = getDepthwiseConv(); + _kernels[ir::OpCode::TransposeConv] = getTransposeConv(); + _kernels[ir::OpCode::Logistic] = getLogistic(); + _kernels[ir::OpCode::Pad] = getPad(); + _kernels[ir::OpCode::ReLU] = getReLU(); + _kernels[ir::OpCode::ReLU1] = getReLU1(); + _kernels[ir::OpCode::ReLU6] = getReLU6(); + _kernels[ir::OpCode::Tanh] = getTanh(); + } + + void execute(const ir::OperationIndex &idx) + { + const auto nodeName = _env->graph().operations().at(idx).name(); + VERBOSE(INTERPRETER) << "Prepare output operands and execute " << nodeName + << " operation (id: " << idx.value() << ")" << std::endl; + _env->graph().operations().at(idx).accept(*this); + } + +private: +#define OP(InternalName) \ + void visit(const ir::operation::InternalName &node) override \ + { \ + if (_kernels[ir::OpCode::InternalName]->prepare != nullptr) \ + { \ + _kernels[ir::OpCode::InternalName]->prepare(_env, node); \ + } \ + _kernels[ir::OpCode::InternalName]->invoke(_env, node); \ + } +#include "ir/Operations.lst" +#undef OP + +private: + ExecEnv *_env; + std::unordered_map<ir::OpCode, OpKernel *> _kernels; +}; + +void Interpreter::run() +{ + VERBOSE(INTERPRETER) << "Interpreter is invoked " << std::endl; + + // operand_stack: save operands prepared to use + std::stack<ir::OperandIndex> operand_stack; + + // Note: We should push input first, then constant. + // We use use-def for find operators ready to execution, + // but Use-Def cannot handle parameters (maybe constant, but not always) + // Note: If all model inputs are constant, it may not work (depend on tensors' order). + // But that scenario may not exist + for (auto ind : _env->graph().getInputs()) + { + VERBOSE(INTERPRETER) << "Input: Push to operand stack " << ind.value() << std::endl; + + operand_stack.push(ind); + } + + _env->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { + if (obj.isConstant()) + { + VERBOSE(INTERPRETER) << "Constant: Push to operand stack " << ind.value() << std::endl; + + operand_stack.push(ind); + } + }); + + // Execution + std::unordered_set<ir::OperandIndex> ready_check; + std::unordered_set<ir::OperationIndex> executed; + OperationExecutor executor{_env.get()}; + while (!operand_stack.empty()) + { + const auto current_operand_index = operand_stack.top(); + operand_stack.pop(); + VERBOSE(INTERPRETER) << "Poped operand " << current_operand_index.value() + << " is checked ready to use" << std::endl; + + assert(ready_check.find(current_operand_index) == ready_check.end()); + ready_check.insert(current_operand_index); + + // Find prepared operations by scan use of current operand + std::stack<ir::OperationIndex> operation_stack; + auto use_operators = std::list<ir::OperationIndex>( + _env->graph().operands().at(current_operand_index).getUses().list()); + // Remove operation index duplication + // If one operation uses same operand tensor for multiple input, + // use-list have duplicated operation index + use_operators.unique(); + for (auto use_operator : use_operators) + { + // Assumption: all parameters are ready to use + bool operator_ready = true; + for (auto input_index : _env->graph().operations().at(use_operator).getInputs()) + { + if (ready_check.find(input_index) == ready_check.end()) + { + operator_ready = false; + break; + } + } + + if (operator_ready) + { + VERBOSE(INTERPRETER) << "Ready to execute operation " << use_operator.value() << std::endl; + operation_stack.push(use_operator); + } + } + + while (!operation_stack.empty()) + { + const auto current_operation_index = operation_stack.top(); + operation_stack.pop(); + VERBOSE(INTERPRETER) << "Poped operation: " << current_operation_index.value() << "(" + << _env->graph().operations().at(current_operation_index).name() << ")" + << std::endl; + + // execution + // 1. Prepare output tensor + // 2. Call operation kernel + executor.execute(current_operation_index); + executed.insert(current_operation_index); + + // 3. Push each output into operand stack + const auto def_operands = _env->graph().operations().at(current_operation_index).getOutputs(); + for (auto def_operand : def_operands) + { + VERBOSE(INTERPRETER) << "Buffer: Push to operand stack " << def_operand.value() + << std::endl; + operand_stack.push(def_operand); + } + + // 4. Free if lifetime of buffer operands used by input is finished + for (auto input_index : _env->graph().operations().at(current_operation_index).getInputs()) + { + const auto use_operators = _env->graph().operands().at(input_index).getUses(); + bool dead_buffer = true; + for (auto use_operator : use_operators.list()) + { + if (executed.find(use_operator) == executed.end()) + { + dead_buffer = false; + break; + } + } + + if (dead_buffer) + { + _env->freeIfAllocated(input_index); + } + } + } + } +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/Interpreter.h b/runtime/neurun/core/src/exec/interp/Interpreter.h new file mode 100644 index 000000000..1b73592b3 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Interpreter.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Interpreter.h + * @brief This file contains Interpreter class for interpretation + */ +#ifndef __NEURUN_EXEC_INTERP_INTERPRETER_H__ +#define __NEURUN_EXEC_INTERP_INTERPRETER_H__ + +#include "ExecEnv.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Class for interpretation + */ +class Interpreter +{ + +public: + /** + * @brief Construct a new Interpreter object (deleted) + */ + Interpreter() = delete; + /** + * @brief Construct a new Interpreter object + * @param[in] env Execution environment variable for interpreter object + */ + Interpreter(std::unique_ptr<ExecEnv> env) : _env{std::move(env)} + { + // DO NOTHING + } + +public: + /** + * @brief Run interpreter until there is no operation to execute + */ + void run(); + +private: + std::unique_ptr<ExecEnv> _env; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_INTERPRETER_H__ diff --git a/runtime/neurun/core/src/exec/interp/Registration.h b/runtime/neurun/core/src/exec/interp/Registration.h new file mode 100644 index 000000000..3ebe3bc9f --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Registration.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_INTERP_REGISTRATION_H__ +#define __NEURUN_EXEC_INTERP_REGISTRATION_H__ + +#include "ExecEnv.h" + +#include "ir/Operation.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +struct OpKernel +{ + std::function<void(ExecEnv *, const ir::Operation &)> prepare; + std::function<void(const ExecEnv *, const ir::Operation &)> invoke; +}; + +// Defined in operations/ directory +OpKernel *getAdd(); +OpKernel *getSub(); +OpKernel *getMul(); +OpKernel *getConv2D(); +OpKernel *getMaxPool2D(); +OpKernel *getConcat(); +OpKernel *getGather(); +OpKernel *getAvgPool2D(); +OpKernel *getFullyConnected(); +OpKernel *getInstanceNorm(); +OpKernel *getSoftMax(); +OpKernel *getDepthwiseConv(); +OpKernel *getReshape(); +OpKernel *getTransposeConv(); +OpKernel *getLogistic(); +OpKernel *getPad(); +OpKernel *getReLU(); +OpKernel *getReLU1(); +OpKernel *getReLU6(); +OpKernel *getTanh(); + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_REGISTRATION_H__ diff --git a/runtime/neurun/core/src/exec/interp/Tensor.cc b/runtime/neurun/core/src/exec/interp/Tensor.cc new file mode 100644 index 000000000..5c1da3587 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Tensor.cc @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Tensor.h" + +#define NO_USE(a) (void)(a) + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +void ITensor::access(const std::function<void(backend::operand::ITensor &tensor)> &fn) +{ + fn(*this); +} + +size_t ROTensor::calcOffset(const neurun::util::Coordinates &coords) const +{ + NO_USE(coords); + throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now."); +} + +size_t Tensor::calcOffset(const neurun::util::Coordinates &coords) const +{ + NO_USE(coords); + throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now."); +} + +ir::Layout ROTensor::layout() const +{ + // TODO Changes to return frontend layout + return ir::Layout::NHWC; +} + +ir::Layout Tensor::layout() const +{ + // TODO Changes to return frontend layout + return ir::Layout::NHWC; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/Tensor.h b/runtime/neurun/core/src/exec/interp/Tensor.h new file mode 100644 index 000000000..c53fd46a6 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/Tensor.h @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Tensor.h + * @brief This file contains ITensor interface, ROTensor class, and Tensor class + */ +#ifndef __NEURUN_EXEC_INTERP_TENSOR_H__ +#define __NEURUN_EXEC_INTERP_TENSOR_H__ + +#include "Buffer.h" + +#include "ir/OperandInfo.h" +#include "backend/operand/ITensor.h" +#include "ir/Layout.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +/** + * @brief Interface to handle Tensor in interpreter + */ +class ITensor : public backend::operand::ITensor +{ +public: + virtual ~ITensor() = default; + +public: + virtual uint8_t *buffer() const = 0; + /** + * @brief Return shared pointer for buffer + * @return Buffer shared pointer + */ + virtual std::shared_ptr<const Buffer> shareBuffer() const = 0; + /** + * @brief Return read-only buffer pointer + * @return Read-only buffer pointer + */ + virtual const uint8_t *bufferRO() const = 0; + /** + * @brief Return shared pointer for data + * @return Data shared pointer + */ + virtual std::shared_ptr<const ir::Data> shareData() const = 0; + /** + * @brief Set internal/external buffer + * @param[in] buffer Buffer pointer + */ + virtual void setBuffer(std::shared_ptr<const Buffer> buffer) = 0; + /** + * @brief Set data reference (including constant, input) + * @param[in] data Data pointer + */ + virtual void setData(std::shared_ptr<const ir::Data> data) = 0; + virtual void releaseData() = 0; + + virtual size_t total_size() const = 0; + virtual size_t dimension(size_t index) const = 0; + virtual size_t num_dimensions() const = 0; + virtual size_t calcOffset(const util::Coordinates &coords) const = 0; + + virtual bool has_padding() const = 0; + /** + * @brief Return data type of tensor + * @return Data type of tensor + */ + virtual ir::DataType data_type() const = 0; + /** + * @brief Return TensorInfo + * @return TensorInfo + */ + virtual const ir::OperandInfo &tensorInfo() const = 0; + /** + * @brief Return number of elements + * @return Number of elements + */ + virtual uint64_t num_elements() const = 0; + void access(const std::function<void(backend::operand::ITensor &tensor)> &fn) final; +}; + +/** + * @brief Class to handle tensor in interpreter as read-only + */ +class ROTensor final : public ITensor +{ +public: + ROTensor() = delete; + ROTensor(const ir::OperandInfo &info) : _info(info) + { + // DO NOTHING + } + +public: + uint8_t *buffer() const override { throw std::runtime_error{"Read only tensor"}; } + std::shared_ptr<const Buffer> shareBuffer() const override + { + throw std::runtime_error{"Read only tensor"}; + } + const uint8_t *bufferRO() const override { return _data->base(); } + std::shared_ptr<const ir::Data> shareData() const override { return _data; } + void setBuffer(std::shared_ptr<const Buffer> buffer) override { _data = buffer; } + void setData(std::shared_ptr<const ir::Data> data) override { _data = data; } + void releaseData() override { _data = nullptr; } + + size_t total_size() const override { return _info.total_size(); } + size_t dimension(size_t index) const override { return _info.shape().dim(index); } + size_t num_dimensions() const override { return _info.shape().rank(); } + size_t calcOffset(const util::Coordinates &coords) const override; + ir::Layout layout() const override; + bool has_padding() const override { return false; } + ir::DataType data_type() const override { return _info.typeInfo().type(); } + const ir::OperandInfo &tensorInfo() const override { return _info; } + uint64_t num_elements() const override { return _info.shape().num_elements(); }; + +private: + const ir::OperandInfo _info; + std::shared_ptr<const ir::Data> _data{nullptr}; +}; + +/** + * @brief Class to handle tensor in interpreter as writable + */ +class Tensor final : public ITensor +{ +public: + Tensor() = delete; + Tensor(const ir::OperandInfo &info) : _info(info) + { + // DO NOTHING + } + +public: + uint8_t *buffer() const override { return _buffer->baseWritable(); } + std::shared_ptr<const Buffer> shareBuffer() const override { return _buffer; }; + const uint8_t *bufferRO() const override { return _buffer->base(); } + std::shared_ptr<const ir::Data> shareData() const override { return _buffer; } + void setBuffer(std::shared_ptr<const Buffer> buffer) override { _buffer = buffer; } + void setData(std::shared_ptr<const ir::Data>) override + { + throw std::runtime_error{"Passed data may read-only"}; + } + void releaseData() override { _buffer = nullptr; } + + size_t total_size() const override { return _info.total_size(); } + size_t dimension(size_t index) const override { return _info.shape().dim(index); } + size_t num_dimensions() const override { return _info.shape().rank(); } + size_t calcOffset(const util::Coordinates &coords) const override; + ir::Layout layout() const override; + bool has_padding() const override { return false; } + ir::DataType data_type() const override { return _info.typeInfo().type(); } + const ir::OperandInfo &tensorInfo() const override { return _info; } + uint64_t num_elements() const override { return _info.shape().num_elements(); }; + +private: + const ir::OperandInfo _info; + std::shared_ptr<const Buffer> _buffer{nullptr}; +}; + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_TENSOR_H__ diff --git a/runtime/neurun/core/src/exec/interp/operations/AvgPool2D.cc b/runtime/neurun/core/src/exec/interp/operations/AvgPool2D.cc new file mode 100644 index 000000000..bd396491f --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/AvgPool2D.cc @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/AveragePool.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/AvgPool2D.h" +#include "util/Utils.h" +#include "util/Padding.h" +#include "util/ShapeInference.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace avgpool2d +{ + +void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + UNUSED_RELEASE(in_tensor); + + assert(in_tensor->num_dimensions() == 4); + + const auto output_info = env->graph().operands().at(out_index).info(); + if (output_info.total_size() == 0) + { + // Handle unspecified output shape + const auto &avgpool_node = + nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node); + const auto infered_output_shapes = + shape_inference::inferAvgPoolShape(in_tensor->tensorInfo().shape(), avgpool_node.param()); + env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()}); + } + else + { + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 4); +} + +void invoke(const ITensor *in_tensor, const ITensor *out_tensor, + const ir::operation::AvgPool2D::Param ¶m) +{ + // TODO Support NCHW frontend + const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape, + param.stride, param.kw, param.kh); + // Calculate + nnfw::cker::PoolParams cker_param; + calculateActivationRange(param.activation, &cker_param.float_activation_min, + &cker_param.float_activation_max); + cker_param.filter_width = param.kw; + cker_param.filter_height = param.kh; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + + const auto in_shape = convertShape(in_tensor->tensorInfo().shape()); + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO()); + float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer()); + + nnfw::cker::AveragePool(cker_param, in_shape, in_ptr, out_shape, out_ptr); +} + +void invokeAvgPool2D(const ExecEnv *env, const ir::Operation &node) +{ + const auto &avgpool_node = + nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node); + + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + // Check lhs shape is same with rhs (with broadcast) + const auto in_tensor = env->tensorAt(in_index); + const auto out_tensor = env->tensorAt(out_index); + + const auto data_type = in_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(in_tensor, out_tensor, avgpool_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float only"}; + } +} +} // namespace avgpool2d + +OpKernel *getAvgPool2D() +{ + static OpKernel kernel = {avgpool2d::prepareAvgPool2D, avgpool2d::invokeAvgPool2D}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/BinaryArithmeticOps.cc b/runtime/neurun/core/src/exec/interp/operations/BinaryArithmeticOps.cc new file mode 100644 index 000000000..16469b9db --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/BinaryArithmeticOps.cc @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/BinaryArithmeticOps.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Add.h" +#include "ir/operation/Sub.h" +#include "ir/operation/Mul.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +enum class OpType +{ + ADD, + SUB, + MUL +}; + +template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation &node) +{ + const auto &add_node = nnfw::misc::polymorphic_downcast<const node_type &>(node); + + const auto lhs_index = node.getInputs().at(add_node.LHS); + const auto rhs_index = node.getInputs().at(add_node.RHS); + const auto out_index = node.getOutputs().at(0); + + const auto lhs_tensor = env->tensorAt(lhs_index); + const auto rhs_tensor = env->tensorAt(rhs_index); + + // Check shape and type lhs is same with rhs + // TODO Util function to compare TensorInfo + if (lhs_tensor->data_type() != rhs_tensor->data_type()) + { + throw std::runtime_error{"Interp(Add): Different input types"}; + } + + bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape()); + if (try_broadcast) + { + bool success = true; + auto out_shape = calcBroadcastShape(lhs_tensor->tensorInfo().shape(), + rhs_tensor->tensorInfo().shape(), success); + if (!success) + { + throw std::runtime_error{"Interp(Add): Fail to brodcasting"}; + } + + auto output_info = ir::OperandInfo(out_shape, lhs_tensor->tensorInfo().typeInfo()); + // We can handle already allocated (ex. model output) + env->allocateIfNeeded(out_index, output_info); + } + else + { + // Output's shape and type is same with input + auto output_info = lhs_tensor->tensorInfo(); + // We can handle already allocated (ex. model output) + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + // Check shape and type lhs is same with output + // TODO Util function to compare TensorInfo + if (lhs_tensor->data_type() != out_tensor->data_type()) + { + throw std::runtime_error{"Interp(Add): Invalid output type"}; + } +} + +inline void setActivationParams(float min, float max, nnfw::cker::BinaryArithmeticOpParam *params) +{ + params->float_activation_min = min; + params->float_activation_max = max; +} + +inline void setActivationParams(int32_t min, int32_t max, + nnfw::cker::BinaryArithmeticOpParam *params) +{ + params->quantized_activation_min = min; + params->quantized_activation_max = max; +} + +template <typename raw_type, typename param_type, OpType op_type> +void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor, + const param_type ¶m) +{ + const auto lhs_buffer = lhs_tensor->bufferRO(); + const auto rhs_buffer = rhs_tensor->bufferRO(); + auto out_buffer = out_tensor->buffer(); + + nnfw::cker::BinaryArithmeticOpParam cker_param; + raw_type activation_min, activation_max; + calculateActivationRange(param.activation, &activation_min, &activation_max); + setActivationParams(activation_min, activation_max, &cker_param); + const raw_type *lhs_ptr = reinterpret_cast<const raw_type *>(lhs_buffer); + const raw_type *rhs_ptr = reinterpret_cast<const raw_type *>(rhs_buffer); + raw_type *out_ptr = reinterpret_cast<raw_type *>(out_buffer); + + // Calculate + const std::function<raw_type(const raw_type &, const raw_type &)> fn_add = + [](const raw_type &a, const raw_type &b) { return a + b; }; + const std::function<raw_type(const raw_type &, const raw_type &)> fn_sub = + [](const raw_type &a, const raw_type &b) { return a - b; }; + const std::function<raw_type(const raw_type &, const raw_type &)> fn_mul = + [](const raw_type &a, const raw_type &b) { return a * b; }; + + const std::function<raw_type(const raw_type &, const raw_type &)> fn = + (op_type == OpType::ADD) ? fn_add : ((op_type == OpType::SUB) ? fn_sub : fn_mul); + + if (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape()) + { + const auto lhs_shape = convertExtendShape(lhs_tensor->tensorInfo().shape()); + const auto rhs_shape = convertExtendShape(rhs_tensor->tensorInfo().shape()); + const auto out_shape = convertExtendShape(out_tensor->tensorInfo().shape()); + nnfw::cker::BroadcastBinaryArithmeticOpSlow(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr, + out_shape, out_ptr, fn); + return; + } + + const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape()); + const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape()); + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + nnfw::cker::BinaryArithmeticOp(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr, out_shape, + out_ptr, fn); +} + +template <typename node_type, typename param_type, OpType op_type> +void invokeAdd(const ExecEnv *env, const ir::Operation &node) +{ + const auto &arithmetic_node = nnfw::misc::polymorphic_downcast<const node_type &>(node); + + const auto lhs_index = node.getInputs().at(arithmetic_node.LHS); + const auto rhs_index = node.getInputs().at(arithmetic_node.RHS); + const auto out_index = node.getOutputs().at(0); + const auto lhs_tensor = env->tensorAt(lhs_index); + const auto rhs_tensor = env->tensorAt(rhs_index); + const auto out_tensor = env->tensorAt(out_index); + const auto data_type = lhs_tensor->data_type(); + + if (data_type == ir::DataType::INT32) + { + invoke<int32_t, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor, + arithmetic_node.param()); + } + else if (data_type == ir::DataType::FLOAT32) + { + invoke<float, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor, arithmetic_node.param()); + } + else + { + throw std::runtime_error{"NYI: Unsupported data type"}; + } +} +} // namespace add + +OpKernel *getAdd() +{ + static OpKernel kernel = {prepareAdd<ir::operation::Add>, + invokeAdd<ir::operation::Add, ir::operation::Add::Param, OpType::ADD>}; + return &kernel; +} + +OpKernel *getSub() +{ + static OpKernel kernel = {prepareAdd<ir::operation::Sub>, + invokeAdd<ir::operation::Sub, ir::operation::Sub::Param, OpType::SUB>}; + return &kernel; +} + +OpKernel *getMul() +{ + static OpKernel kernel = {prepareAdd<ir::operation::Mul>, + invokeAdd<ir::operation::Mul, ir::operation::Mul::Param, OpType::MUL>}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/Concat.cc b/runtime/neurun/core/src/exec/interp/operations/Concat.cc new file mode 100644 index 000000000..a127e5f30 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Concat.cc @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Concatenation.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Concat.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace concat +{ + +void prepareConcat(ExecEnv *env, const ir::Operation &node) +{ + const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node); + + const auto first_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto first_tensor = env->tensorAt(first_index); + uint32_t out_axis_dimension = 0; + const int32_t axis_raw = concat_node.param().axis; + const uint32_t axis = (axis_raw < 0) ? (axis_raw + first_tensor->num_dimensions()) : axis_raw; + + // All inputs shape should be same except axis dimension + // All inputs type should be same + for (auto input : node.getInputs()) + { + assert(first_tensor->num_dimensions() == env->tensorAt(input)->num_dimensions()); + assert(first_tensor->data_type() == env->tensorAt(input)->data_type()); + for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++) + { + if (i == axis) + { + out_axis_dimension += env->tensorAt(input)->dimension(i); + continue; + } + assert(first_tensor->dimension(i) == env->tensorAt(input)->dimension(i)); + } + } + + // Make output tensor info using first input tensor info, and accumulated axis dimension value + auto out_shape = first_tensor->tensorInfo().shape(); + out_shape.dim(axis) = out_axis_dimension; + env->allocateIfNeeded(out_index, + ir::OperandInfo{out_shape, first_tensor->tensorInfo().typeInfo()}); + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Output shape should be same with input except axis dimension + // Output type should be same with input + assert(first_tensor->data_type() == out_tensor->data_type()); + for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++) + { + if (i == axis) + { + continue; + } + assert(first_tensor->dimension(i) == out_tensor->dimension(i)); + } +} + +void invoke(const std::vector<const ITensor *> in_tensors, const ITensor *out_tensor, uint32_t axis) +{ + const uint32_t count = in_tensors.size(); + + // Calculate + nnfw::cker::ConcatenationParams cker_param; + cker_param.axis = (int8_t)axis; + cker_param.inputs_count = count; + + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + + std::vector<nnfw::cker::Shape> in_shapes; + std::vector<const nnfw::cker::Shape *> in_shape_ptrs; + in_shapes.reserve(count); + in_shape_ptrs.reserve(count); + std::vector<const float *> in_ptrs; + for (uint32_t i = 0; i < count; i++) + { + in_shapes.push_back(convertShape(in_tensors[i]->tensorInfo().shape())); + in_shape_ptrs.push_back(&in_shapes[i]); + in_ptrs.push_back(reinterpret_cast<const float *>(in_tensors[i]->bufferRO())); + } + + auto out_buffer = out_tensor->buffer(); + float *out_ptr = reinterpret_cast<float *>(out_buffer); + + nnfw::cker::Concatenation<float>(cker_param, in_shape_ptrs.data(), in_ptrs.data(), out_shape, + out_ptr); +} + +void invokeConcat(const ExecEnv *env, const ir::Operation &node) +{ + const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node); + const int32_t axis_raw = concat_node.param().axis; + + std::vector<const ITensor *> in_tensors; + for (const auto &e : concat_node.getInputs()) + { + in_tensors.emplace_back(env->tensorAt(e)); + } + + const auto out_index = node.getOutputs().at(0); + const auto out_tensor = env->tensorAt(out_index); + const uint32_t axis = (axis_raw < 0) ? (axis_raw + out_tensor->num_dimensions()) : axis_raw; + + const auto data_type = in_tensors[0]->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(in_tensors, out_tensor, axis); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} +} // namespace concat + +OpKernel *getConcat() +{ + static OpKernel kernel = {concat::prepareConcat, concat::invokeConcat}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/Conv2D.cc b/runtime/neurun/core/src/exec/interp/operations/Conv2D.cc new file mode 100644 index 000000000..5242247a4 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Conv2D.cc @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Conv.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Conv2D.h" +#include "util/Utils.h" +#include "util/Padding.h" +#include "util/ShapeInference.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace conv2d +{ + +void prepareConv2D(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(ir::operation::Conv2D::INPUT); + const auto kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL); + const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto kernel_tensor = env->tensorAt(kernel_index); + const auto bias_tensor = env->tensorAt(bias_index); + + assert(in_tensor->num_dimensions() == 4); + assert(kernel_tensor->num_dimensions() == 4); + assert(bias_tensor->num_dimensions() == 1); + + UNUSED_RELEASE(in_tensor); + UNUSED_RELEASE(kernel_tensor); + UNUSED_RELEASE(bias_tensor); + + const auto output_info = env->graph().operands().at(out_index).info(); + if (output_info.total_size() == 0) + { + // Handle unspecified output shape + const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node); + const auto infered_output_shapes = shape_inference::inferConv2DShape( + in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), conv_node.param()); + env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()}); + } + else + { + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 4); +} + +void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor, + const ITensor *ofm_tensor, const ir::operation::Conv2D::Param ¶m) +{ + // TODO Support NCHW frontned + const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &ker_shape = ker_tensor->tensorInfo().shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape, + param.stride, ker_width, ker_height); + + // Calculate + float activation_min, activation_max; + calculateActivationRange(param.activation, &activation_min, &activation_max); + + nnfw::cker::ConvParams cker_param; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + cker_param.dilation_width_factor = 1; + cker_param.dilation_height_factor = 1; + cker_param.float_activation_min = activation_min; + cker_param.float_activation_max = activation_max; + + const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape()); + const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape()); + const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape()); + const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape()); + const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO()); + const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO()); + const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO()); + float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer()); + + nnfw::cker::Conv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, cker_bias_shape, + bias_ptr, cker_ofm_shape, ofm_ptr); +} + +void invokeConv2D(const ExecEnv *env, const ir::Operation &node) +{ + const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node); + + const auto ifm_index = node.getInputs().at(ir::operation::Conv2D::INPUT); + const auto ker_index = node.getInputs().at(ir::operation::Conv2D::KERNEL); + const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto bias_tensor = env->tensorAt(bias_index); + const auto ofm_tensor = env->tensorAt(ofm_index); + + const auto data_type = ifm_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} +} // namespace conv2d + +OpKernel *getConv2D() +{ + static OpKernel kernel = {conv2d::prepareConv2D, conv2d::invokeConv2D}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/DepthwiseConv.cc b/runtime/neurun/core/src/exec/interp/operations/DepthwiseConv.cc new file mode 100644 index 000000000..1d3649f48 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/DepthwiseConv.cc @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/DepthwiseConv.h> +#include <misc/polymorphic_downcast.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/DepthwiseConv2D.h" +#include "util/Padding.h" +#include "util/Utils.h" +#include "util/ShapeInference.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +namespace +{ + +void prepareDepthwiseConv(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT); + const auto kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL); + const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto kernel_tensor = env->tensorAt(kernel_index); + const auto bias_tensor = env->tensorAt(bias_index); + + assert(in_tensor->num_dimensions() == 4); + assert(kernel_tensor->num_dimensions() == 4); + assert(bias_tensor->num_dimensions() == 1); + + UNUSED_RELEASE(in_tensor); + UNUSED_RELEASE(kernel_tensor); + UNUSED_RELEASE(bias_tensor); + + // TODO handle unspecified output shape: + // calculate output shape using ifm shape, kernel shape, padding, stride + const auto output_info = env->graph().operands().at(out_index).info(); + if (output_info.total_size() == 0) + { + // Handle unspecified output shape + const auto &depth_conv_node = + nnfw::misc::polymorphic_downcast<const ir::operation::DepthwiseConv2D &>(node); + const auto infered_output_shapes = shape_inference::inferDepthwiseConv2DShape( + in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), + depth_conv_node.param()); + env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()}); + } + else + { + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 4); +} + +void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor, + const ITensor *ofm_tensor, const ir::operation::DepthwiseConv2D::Param ¶m) +{ + // TODO Support NCHW frontend + const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + // Kernel format is [1, kernel_height, kernel_width, depth_out]. + const auto &ker_shape = ker_tensor->tensorInfo().shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape, + param.stride, ker_width, ker_height); + + // Calculate + float activation_min, activation_max; + calculateActivationRange(param.activation, &activation_min, &activation_max); + + nnfw::cker::DepthwiseConvParams cker_param; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.depth_multiplier = param.multiplier; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + cker_param.dilation_width_factor = 1; + cker_param.dilation_height_factor = 1; + cker_param.float_activation_min = activation_min; + cker_param.float_activation_max = activation_max; + + const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape()); + const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape()); + const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape()); + const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape()); + const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO()); + const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO()); + const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO()); + float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer()); + + nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, + cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr); +} + +void invokeDepthwiseConv(const ExecEnv *env, const ir::Operation &node) +{ + const auto &conv_node = static_cast<const ir::operation::DepthwiseConv2D &>(node); + + const auto ifm_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT); + const auto ker_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL); + const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto bias_tensor = env->tensorAt(bias_index); + const auto ofm_tensor = env->tensorAt(ofm_index); + + const auto data_type = ifm_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} + +} // namespace + +OpKernel *getDepthwiseConv() +{ + static OpKernel kernel = {prepareDepthwiseConv, invokeDepthwiseConv}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/FullyConnected.cc b/runtime/neurun/core/src/exec/interp/operations/FullyConnected.cc new file mode 100644 index 000000000..9c1c5d4e2 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/FullyConnected.cc @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/FullyConnected.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/FullyConnected.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace fc +{ + +void prepareFC(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(ir::operation::FullyConnected::INPUT); + const auto kernel_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT); + const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto kernel_tensor = env->tensorAt(kernel_index); + const auto bias_tensor = env->tensorAt(bias_index); + + UNUSED_RELEASE(in_tensor); + UNUSED_RELEASE(kernel_tensor); + UNUSED_RELEASE(bias_tensor); + + assert(in_tensor->num_dimensions() >= 2); + assert(kernel_tensor->num_dimensions() == 2); + assert(bias_tensor->num_dimensions() == 1); + + const auto input_size_with_batch = in_tensor->num_elements(); + const auto num_units = kernel_tensor->dimension(0); + const auto input_size = kernel_tensor->dimension(1); + const auto batch_size = input_size_with_batch / input_size; + assert(input_size_with_batch % input_size == 0); + assert(num_units == bias_tensor->dimension(0)); + + // Make output tensor info + ir::Shape output_shape(2); + output_shape.dim(0) = batch_size; + output_shape.dim(1) = num_units; + const ir::OperandInfo out_info{output_shape, in_tensor->tensorInfo().typeInfo()}; + env->allocateIfNeeded(out_index, out_info); + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 2); + assert(out_tensor->dimension(0) == batch_size); + assert(out_tensor->dimension(1) == num_units); +} + +void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor, + const ITensor *ofm_tensor, const ir::operation::FullyConnected::Param ¶m) +{ + const auto ifm_buffer = ifm_tensor->bufferRO(); + const auto ker_buffer = ker_tensor->bufferRO(); + const auto bias_buffer = bias_tensor->bufferRO(); + auto ofm_buffer = ofm_tensor->buffer(); + + // Calculate + nnfw::cker::FullyConnectedParams cker_param; + calculateActivationRange(param.activation, &cker_param.float_activation_min, + &cker_param.float_activation_max); + const auto cker_ifm_shape = convertExtendShape(ifm_tensor->tensorInfo().shape()); + const auto cker_ker_shape = convertExtendShape(ker_tensor->tensorInfo().shape()); + const auto cker_bias_shape = convertExtendShape(bias_tensor->tensorInfo().shape()); + const auto cker_ofm_shape = convertExtendShape(ofm_tensor->tensorInfo().shape()); + const float *ifm_ptr = reinterpret_cast<const float *>(ifm_buffer); + const float *ker_ptr = reinterpret_cast<const float *>(ker_buffer); + const float *bias_ptr = reinterpret_cast<const float *>(bias_buffer); + float *ofm_ptr = reinterpret_cast<float *>(ofm_buffer); + + nnfw::cker::FullyConnected(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, + cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr); +} + +void invokeFC(const ExecEnv *env, const ir::Operation &node) +{ + const auto &conv_node = + nnfw::misc::polymorphic_downcast<const ir::operation::FullyConnected &>(node); + + const auto ifm_index = node.getInputs().at(ir::operation::FullyConnected::INPUT); + const auto ker_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT); + const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto bias_tensor = env->tensorAt(bias_index); + const auto ofm_tensor = env->tensorAt(ofm_index); + + const auto data_type = ifm_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float only"}; + } +} +} // namespace fc + +OpKernel *getFullyConnected() +{ + static OpKernel kernel = {fc::prepareFC, fc::invokeFC}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/Gather.cc b/runtime/neurun/core/src/exec/interp/operations/Gather.cc new file mode 100644 index 000000000..8b64d1937 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Gather.cc @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Gather.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Gather.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void prepareGather(ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT); + const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + const auto indices_tensor = env->tensorAt(indices_index); + + // TODO handle unspecified output shape: + // calculate output shape using ifm shape, kernel shape, padding, stride + const auto output_info = env->graph().operands().at(output_index).info(); + if (output_info.total_size() == 0) + { + throw std::runtime_error{"Interp(Gather): NYI for unspecified output shape"}; + } + else + { + env->allocateIfNeeded(output_index, output_info); + } + + if (indices_tensor->data_type() != ir::DataType::INT32) + { + throw std::runtime_error{"Interp(Gather): Invalid indices data type"}; + } + + auto output_tensor = env->tensorAt(output_index); + auto output_rank = input_tensor->num_dimensions() + indices_tensor->num_dimensions() - 1; + + if (output_rank != output_tensor->num_dimensions()) + { + throw std::runtime_error{"Interp(Gather): Invalid output rank"}; + } + if (output_tensor->data_type() != input_tensor->data_type()) + { + throw std::runtime_error{"Interp(Gather): Invalid output data type"}; + } + + if (input_tensor->data_type() == ir::DataType::QUANT8_ASYMM && + input_tensor->tensorInfo().typeInfo() != output_tensor->tensorInfo().typeInfo()) + { + throw std::runtime_error{ + "Interp(Gather): Cannot handle different I/O QUANT8_ASYMM scale/offset"}; + } +} + +template <typename raw_type> +void invoke(const ITensor *input_tensors, const ITensor *indices_tensors, + const ITensor *output_tensor, uint32_t axis) +{ + // Calculate + nnfw::cker::GatherParams cker_param; + cker_param.axis = (int8_t)axis; + + const auto cker_input_shapes = convertShape(input_tensors->tensorInfo().shape()); + const auto cker_indices_shape = convertShape(indices_tensors->tensorInfo().shape()); + const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); + const raw_type *input_ptr = reinterpret_cast<const raw_type *>(input_tensors->bufferRO()); + const int32_t *indices_ptr = reinterpret_cast<const int32_t *>(indices_tensors->bufferRO()); + raw_type *output_ptr = reinterpret_cast<raw_type *>(output_tensor->buffer()); + + nnfw::cker::Gather<raw_type>(cker_param, cker_input_shapes, input_ptr, cker_indices_shape, + indices_ptr, cker_output_shape, output_ptr); +} + +void invokeGather(const ExecEnv *env, const ir::Operation &node) +{ + const auto &gather_node = nnfw::misc::polymorphic_downcast<const ir::operation::Gather &>(node); + const int32_t axis_raw = gather_node.param().axis; + + const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT); + const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + const auto indices_tensor = env->tensorAt(indices_index); + const auto output_tensor = env->tensorAt(output_index); + const uint32_t axis = (axis_raw < 0) ? (axis_raw + input_tensor->num_dimensions()) : axis_raw; + + const auto data_type = input_tensor->data_type(); + + switch (data_type) + { + case ir::DataType::FLOAT32: + invoke<float>(input_tensor, indices_tensor, output_tensor, axis); + break; + case ir::DataType::INT32: + invoke<int32_t>(input_tensor, indices_tensor, output_tensor, axis); + break; + case ir::DataType::QUANT8_ASYMM: + invoke<uint8_t>(input_tensor, indices_tensor, output_tensor, axis); + break; + default: + throw std::runtime_error{"Interp(Gather): NYI - Not supported type"}; + } +} + +} // namespace concat + +OpKernel *getGather() +{ + static OpKernel kernel = {prepareGather, invokeGather}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/InstanceNorm.cc b/runtime/neurun/core/src/exec/interp/operations/InstanceNorm.cc new file mode 100644 index 000000000..d1623d53c --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/InstanceNorm.cc @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/InstanceNorm.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/InstanceNorm.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace instancenorm +{ + +void prepareInstanceNorm(ExecEnv *env, const ir::Operation &node) +{ + const auto &instancenorm_node = + nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node); + + const auto input_index = node.getInputs().at(instancenorm_node.INPUT); + const auto output_index = node.getOutputs().at(0); + const auto input_tensor = env->tensorAt(input_index); + + if (input_tensor->num_dimensions() != 4) + { + throw std::runtime_error{"Interp(InstanceNorm): Input should be 4D-tensor"}; + } + + // Output shape should be same with input + env->allocateIfNeeded(output_index, input_tensor->tensorInfo()); + + auto output_tensor = env->tensorAt(output_index); + UNUSED_RELEASE(output_tensor); + + // Handle same ifm & ofm data type only + assert(input_tensor->data_type() == output_tensor->data_type()); + assert(input_tensor->tensorInfo().shape() == output_tensor->tensorInfo().shape()); +} + +inline void setActivationParams(float min, float max, nnfw::cker::InstanceNormParams *params) +{ + params->float_activation_min = min; + params->float_activation_max = max; +} + +void invoke(const ITensor *input_tensor, const ITensor *gamma_tensor, const ITensor *beta_tensor, + const ITensor *output_tensor, const ir::operation::InstanceNorm::Param ¶m) +{ + // Calculate + float activation_min, activation_max; + calculateActivationRange(param.activation, &activation_min, &activation_max); + + nnfw::cker::InstanceNormParams cker_param; + cker_param.epsilon = param.epsilon; + cker_param.float_activation_min = activation_min; + cker_param.float_activation_max = activation_max; + + const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape()); + const auto cker_gamma_shape = convertShape(gamma_tensor->tensorInfo().shape()); + const auto cker_beta_shape = convertShape(beta_tensor->tensorInfo().shape()); + const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); + const float *input_ptr = reinterpret_cast<const float *>(input_tensor->bufferRO()); + const float *gamma_ptr = reinterpret_cast<const float *>(gamma_tensor->bufferRO()); + const float *beta_ptr = reinterpret_cast<const float *>(beta_tensor->bufferRO()); + float *output_ptr = reinterpret_cast<float *>(output_tensor->buffer()); + + nnfw::cker::InstanceNorm(cker_param, cker_input_shape, input_ptr, cker_gamma_shape, gamma_ptr, + cker_beta_shape, beta_ptr, cker_output_shape, output_ptr); +} + +void invokeInstanceNorm(const ExecEnv *env, const ir::Operation &node) +{ + const auto &instancenorm_node = + nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node); + + const auto input_index = node.getInputs().at(instancenorm_node.INPUT); + const auto gamma_index = node.getInputs().at(instancenorm_node.GAMMA); + const auto beta_index = node.getInputs().at(instancenorm_node.BETA); + const auto out_index = node.getOutputs().at(0); + const auto input_tensor = env->tensorAt(input_index); + const auto gamma_tensor = env->tensorAt(gamma_index); + const auto beta_tensor = env->tensorAt(beta_index); + const auto out_tensor = env->tensorAt(out_index); + const auto data_type = input_tensor->data_type(); + + if (data_type == ir::DataType::FLOAT32) + { + invoke(input_tensor, gamma_tensor, beta_tensor, out_tensor, instancenorm_node.param()); + } + else + { + throw std::runtime_error{"NYI: Unsupported data type"}; + } +} +} // namespace instancenorm + +OpKernel *getInstanceNorm() +{ + static OpKernel kernel = {instancenorm::prepareInstanceNorm, instancenorm::invokeInstanceNorm}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/Logistic.cc b/runtime/neurun/core/src/exec/interp/operations/Logistic.cc new file mode 100644 index 000000000..2fc68ffd2 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Logistic.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Logistic.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Logistic.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void prepareLogistic(ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(0); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + + const auto output_info = env->graph().operands().at(output_index).info(); + + // Check shape and type lhs is same with rhs + // TODO Util function to compare TensorInfo + if (output_info.total_size() == 0) + { + throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"}; + } + else + { + env->allocateIfNeeded(output_index, output_info); + } + + const auto output_tensor = env->tensorAt(output_index); + if (input_tensor->data_type() != output_tensor->data_type()) + { + throw std::runtime_error{"Interp(Logistic): Invalid output type"}; + } +} + +void invoke(const ITensor *input_tensor, const ITensor *output_tensor) +{ + const auto input_buffer = input_tensor->bufferRO(); + auto output_buffer = output_tensor->buffer(); + + const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape()); + const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); + const float *input_ptr = reinterpret_cast<const float *>(input_buffer); + float *output_ptr = reinterpret_cast<float *>(output_buffer); + + nnfw::cker::Logistic(cker_input_shape, input_ptr, cker_output_shape, output_ptr); +} + +void invokeLogistic(const ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(0); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + const auto output_tensor = env->tensorAt(output_index); + + const auto data_type = input_tensor->data_type(); + + if (data_type == ir::DataType::FLOAT32) + { + invoke(input_tensor, output_tensor); + } + else + { + throw std::runtime_error{"Interp(Logistic): NYI - Unsupported data type"}; + } +} +} // namespace + +OpKernel *getLogistic() +{ + static OpKernel kernel = {prepareLogistic, invokeLogistic}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/MaxPool2D.cc b/runtime/neurun/core/src/exec/interp/operations/MaxPool2D.cc new file mode 100644 index 000000000..3e1711d8e --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/MaxPool2D.cc @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/MaxPool.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/MaxPool2D.h" +#include "util/Utils.h" +#include "util/Padding.h" +#include "util/ShapeInference.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void prepareMaxPool2D(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + + assert(in_tensor->num_dimensions() == 4); + UNUSED_RELEASE(in_tensor); + + const auto output_info = env->graph().operands().at(out_index).info(); + if (output_info.total_size() == 0) + { + // Handle unspecified output shape + const auto &maxpool_node = + nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node); + const auto infered_output_shapes = + shape_inference::inferMaxPoolShape(in_tensor->tensorInfo().shape(), maxpool_node.param()); + env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()}); + } + else + { + env->allocateIfNeeded(out_index, output_info); + } + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Handle same ifm & ofm data type only + assert(in_tensor->data_type() == out_tensor->data_type()); + assert(out_tensor->num_dimensions() == 4); +} + +void invoke(const ITensor *in_tensor, const ITensor *out_tensor, + const ir::operation::MaxPool2D::Param ¶m) +{ + // TODO support NCHW frontend + const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape, + param.stride, param.kw, param.kh); + // Calculate + nnfw::cker::PoolParams cker_param; + calculateActivationRange(param.activation, &cker_param.float_activation_min, + &cker_param.float_activation_max); + cker_param.filter_width = param.kw; + cker_param.filter_height = param.kh; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + + const auto in_shape = convertShape(in_tensor->tensorInfo().shape()); + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO()); + float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer()); + + nnfw::cker::MaxPool(cker_param, in_shape, in_ptr, out_shape, out_ptr); +} + +void invokeMaxPool2D(const ExecEnv *env, const ir::Operation &node) +{ + const auto &maxpool_node = + nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node); + + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto out_tensor = env->tensorAt(out_index); + + const auto data_type = in_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(in_tensor, out_tensor, maxpool_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} +} // namespace + +OpKernel *getMaxPool2D() +{ + static OpKernel kernel = {prepareMaxPool2D, invokeMaxPool2D}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/OperationUtil.h b/runtime/neurun/core/src/exec/interp/operations/OperationUtil.h new file mode 100644 index 000000000..5f4146bb8 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/OperationUtil.h @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_ +#define __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_ + +#include "ir/Shape.h" +#include "ir/InternalType.h" + +#include <cker/Shape.h> + +namespace neurun +{ +namespace exec +{ +namespace interp +{ + +inline nnfw::cker::Shape convertShape(const ir::Shape &shape) +{ + auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end()); + + std::vector<int32_t> raw_shape; + raw_shape.resize(4); + + for (uint32_t i = 0; i < 4; ++i) + { + if (i >= dimensions.size()) + { + raw_shape[i] = 1; + } + else + { + raw_shape[i] = dimensions[i]; + } + } + + return nnfw::cker::GetShape(raw_shape); +} + +inline nnfw::cker::Shape convertExtendShape(const ir::Shape &shape) +{ + auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end()); + + std::vector<int32_t> raw_shape; + raw_shape.resize(4); + uint32_t start = 4 - dimensions.size(); + + for (uint32_t i = 0; i < 4; ++i) + { + if (i < start) + { + raw_shape[i] = 1; + } + else + { + raw_shape[i] = dimensions[i - start]; + } + } + + return nnfw::cker::GetShape(raw_shape); +} + +template <typename T> +void calculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max) +{ + if (activation == ir::Activation::RELU) + { + *activation_min = 0; + *activation_max = std::numeric_limits<T>::max(); + } + else if (activation == ir::Activation::RELU6) + { + *activation_min = 0; + *activation_max = 6; + } + else if (activation == ir::Activation::RELU1) + { + *activation_min = -1; + *activation_max = 1; + } + else if (activation == ir::Activation::NONE) + { + *activation_min = std::numeric_limits<T>::lowest(); + *activation_max = std::numeric_limits<T>::max(); + } + else + { + throw std::runtime_error{"Unsupported activation type"}; + } +} + +inline ir::Shape calcBroadcastShape(const ir::Shape &lhs, const ir::Shape &rhs, bool &success) +{ + int lhs_rank = lhs.rank(); + int rhs_rank = rhs.rank(); + + int out_rank = (lhs_rank > rhs_rank ? lhs_rank : rhs_rank); + ir::Shape out_shape(out_rank); + + int lhs_idim = lhs_rank - 1; + int rhs_idim = rhs_rank - 1; + success = true; + for (int out_idim = out_rank - 1; out_idim >= 0; out_idim--) + { + if (lhs_idim == -1 && rhs_idim == -1) + { + // invalid result + success = false; + break; + } + + if (lhs_idim == -1) + { + out_shape.dim(out_idim) = rhs.dim(rhs_idim); + rhs_idim--; + } + else if (rhs_idim == -1) + { + out_shape.dim(out_idim) = lhs.dim(lhs_idim); + lhs_idim--; + } + else + { + if (lhs.dim(lhs_idim) == rhs.dim(rhs_idim)) + { + out_shape.dim(out_idim) = lhs.dim(lhs_idim); + lhs_idim--; + rhs_idim--; + } + else if (lhs.dim(lhs_idim) == 1) + { + out_shape.dim(out_idim) = rhs.dim(rhs_idim); + lhs_idim--; + rhs_idim--; + } + else if (rhs.dim(rhs_idim) == 1) + { + out_shape.dim(out_idim) = lhs.dim(lhs_idim); + lhs_idim--; + rhs_idim--; + } + else + { + // invalid result + success = false; + break; + } + } + } + + if (lhs_idim != -1 || rhs_idim != -1) + { + // invalid result + success = false; + } + return out_shape; +} + +} // namespace interp +} // namespace exec +} // namespace neurun + +#endif // __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_ diff --git a/runtime/neurun/core/src/exec/interp/operations/Pad.cc b/runtime/neurun/core/src/exec/interp/operations/Pad.cc new file mode 100644 index 000000000..0c8267a90 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Pad.cc @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/Pad.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Pad.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void preparePad(ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + + const auto output_info = env->graph().operands().at(output_index).info(); + + // Check shape and type lhs is same with rhs + // TODO Util function to compare TensorInfo + if (output_info.total_size() == 0) + { + throw std::runtime_error{"Interp(Pad): NYI unspecified output shape"}; + } + else + { + env->allocateIfNeeded(output_index, output_info); + } + + const auto output_tensor = env->tensorAt(output_index); + if (input_tensor->data_type() != output_tensor->data_type()) + { + throw std::runtime_error{"Interp(Pad): Invalid output type"}; + } +} + +void invoke(const ITensor *input_tensor, const ITensor *pad_tensor, const ITensor *output_tensor) +{ + const auto input_buffer = input_tensor->bufferRO(); + const auto pad_buffer = pad_tensor->bufferRO(); + auto output_buffer = output_tensor->buffer(); + + int32_t pad_rank = pad_tensor->dimension(0); + + const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape()); + const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); + const float *input_ptr = reinterpret_cast<const float *>(input_buffer); + const int32_t *pad_ptr = reinterpret_cast<const int32_t *>(pad_buffer); + float *output_ptr = reinterpret_cast<float *>(output_buffer); + + nnfw::cker::Pad(pad_ptr, pad_rank, cker_input_shape, input_ptr, cker_output_shape, output_ptr, + nullptr); +} + +void invokePad(const ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT); + const auto pad_index = node.getInputs().at(ir::operation::Pad::PAD); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + const auto pad_tensor = env->tensorAt(pad_index); + const auto output_tensor = env->tensorAt(output_index); + + const auto data_type = input_tensor->data_type(); + + if (data_type == ir::DataType::FLOAT32) + { + invoke(input_tensor, pad_tensor, output_tensor); + } + else + { + throw std::runtime_error{"Interp(Pad): NYI - Unsupported data type"}; + } +} +} // namespace + +OpKernel *getPad() +{ + static OpKernel kernel = {preparePad, invokePad}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/Reshape.cc b/runtime/neurun/core/src/exec/interp/operations/Reshape.cc new file mode 100644 index 000000000..a160232de --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/Reshape.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/interp/Registration.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void prepare(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + // Unspecified shape is not supported in operation node spec now + const auto output_info = env->graph().operands().at(out_index).info(); + env->allocateAndShareIfNeeded(out_index, output_info, in_index); + + assert(output_info.total_size() == env->graph().operands().at(in_index).info().total_size()); +} + +void invoke(const ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + if (env->tensorAt(in_index)->bufferRO() == env->tensorAt(out_index)->bufferRO()) + { + // Same data + return; + } + + const auto output_info = env->graph().operands().at(out_index).info(); + memcpy(env->tensorAt(out_index)->buffer(), env->tensorAt(in_index)->bufferRO(), + output_info.total_size()); +} + +} // namespace {anonymous} + +OpKernel *getReshape() +{ + static OpKernel kernel = {prepare, invoke}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/SoftMax.cc b/runtime/neurun/core/src/exec/interp/operations/SoftMax.cc new file mode 100644 index 000000000..91d98889f --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/SoftMax.cc @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/SoftMax.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/Softmax.h" +#include "misc/polymorphic_downcast.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void Softmax2D(const float *in, const int input_size, const int batch_size, const float beta, + float *out) +{ + assert(input_size > 0); + + // For each batch + for (int b = 0; b < batch_size; b++) + { + // Find the max coeff. + float max_coeff = in[0]; + for (int i = 1; i < input_size; i++) + { + if (in[i] > max_coeff) + max_coeff = in[i]; + } + + // Compute the normalized sum of exps. + float exp_sum = 0.0; + for (int i = 0; i < input_size; i++) + { + out[i] = std::exp((in[i] - max_coeff) * beta); + exp_sum += out[i]; + } + + // Divide by the sum of exps. + float reciprocal_sum_exp = 1.f / exp_sum; + for (int i = 0; i < input_size; i++) + { + out[i] *= reciprocal_sum_exp; + } + + // Advance in and out pointers for the next batch. + in += input_size; + out += input_size; + } +} + +void prepareSoftMax(ExecEnv *env, const ir::Operation &node) +{ + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + UNUSED_RELEASE(in_tensor); + + assert((in_tensor->num_dimensions() == 4) || (in_tensor->num_dimensions() == 2)); + + // Output shape should be same with input + // Output type is pre-defined in model + const auto output_shape = env->graph().operands().at(in_index).info().shape(); + const auto output_type = env->graph().operands().at(out_index).info().typeInfo(); + + const ir::OperandInfo output_info{output_shape, output_type}; + env->allocateIfNeeded(out_index, output_info); + + auto out_tensor = env->tensorAt(out_index); + UNUSED_RELEASE(out_tensor); + + // Check output shape is same with input + assert(out_tensor->num_dimensions() == out_tensor->num_dimensions()); + for (uint32_t i = 0; i < in_tensor->num_dimensions(); i++) + { + assert(in_tensor->dimension(i) == out_tensor->dimension(i)); + } +} + +void invoke(const ITensor *in_tensor, const ITensor *out_tensor, + const ir::operation::Softmax::Param ¶m) +{ + const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO()); + float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer()); + + float beta = param.beta; + + if (in_tensor->num_dimensions() == 2) + { + uint32_t batch_size = in_tensor->dimension(0); + uint32_t input_size = in_tensor->dimension(1); + + Softmax2D(in_ptr, input_size, batch_size, beta, out_ptr); + } + else if (in_tensor->num_dimensions() == 4) + { + const auto in_shape = convertShape(in_tensor->tensorInfo().shape()); + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + + nnfw::cker::SoftmaxParams cker_param; + cker_param.beta = beta; + + nnfw::cker::Softmax(cker_param, in_shape, in_ptr, out_shape, out_ptr); + } + else + { + throw std::runtime_error{"Unsuported input dimension: support 2D or 4D"}; + } +} + +void invokeSoftMax(const ExecEnv *env, const ir::Operation &node) +{ + const auto &softmax_node = nnfw::misc::polymorphic_downcast<const ir::operation::Softmax &>(node); + + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + const auto in_tensor = env->tensorAt(in_index); + const auto out_tensor = env->tensorAt(out_index); + + const auto in_data_type = in_tensor->data_type(); + const auto out_data_type = out_tensor->data_type(); + if ((in_data_type == ir::DataType::FLOAT32) && (out_data_type == ir::DataType::FLOAT32)) + { + invoke(in_tensor, out_tensor, softmax_node.param()); + } + else + { + throw std::runtime_error{"NYI: Support float32 only"}; + } +} + +} // namespace + +OpKernel *getSoftMax() +{ + static OpKernel kernel = {prepareSoftMax, invokeSoftMax}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/TransposeConv.cc b/runtime/neurun/core/src/exec/interp/operations/TransposeConv.cc new file mode 100644 index 000000000..70b72c88d --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/TransposeConv.cc @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cker/operation/TransposeConv.h> +#include <misc/polymorphic_downcast.h> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" +#include "ir/operation/TransposeConv.h" +#include "util/Padding.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +void prepareTransposeConv(ExecEnv *env, const ir::Operation &node) +{ + const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT); + const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL); + const auto ofm_shape_index = node.getInputs().at(ir::operation::TransposeConv::OUTPUT_SHAPE); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto ofm_shape_tensor = env->tensorAt(ofm_shape_index); + + assert(ifm_tensor->num_dimensions() == 4); + assert(ker_tensor->num_dimensions() == 4); + assert(ofm_shape_tensor->num_dimensions() == 1); + + UNUSED_RELEASE(ifm_tensor); + UNUSED_RELEASE(ker_tensor); + UNUSED_RELEASE(ofm_shape_tensor); + + const auto output_info = env->graph().operands().at(ofm_index).info(); + if (output_info.total_size() == 0) + { + // TODO: Handle unspecified output shape + throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"}; + } + else + { + env->allocateIfNeeded(ofm_index, output_info); + } + + auto ofm_tensor = env->tensorAt(ofm_index); + UNUSED_RELEASE(ofm_tensor); + + // Handle same ifm & ofm data type only + if (ifm_tensor->data_type() != ofm_tensor->data_type()) + { + throw std::runtime_error{"Interp(TConv): Different I/O data dype"}; + } + + if (ofm_tensor->num_dimensions() != 4) + { + throw std::runtime_error{"Interp(TConv): Invalid output rank"}; + } +} + +void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *ofm_tensor, + const ir::operation::TransposeConv::Param ¶m) +{ + const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto ker_shape = ker_tensor->tensorInfo().shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto padding = neurun::util::calculatePadding(param.padding, ofm_shape, ifm_shape, + param.stride, ker_width, ker_height); + + nnfw::cker::TransposeConvParams cker_param; + cker_param.padding_values.width = padding.left; + cker_param.padding_values.height = padding.top; + cker_param.stride_width = param.stride.horizontal; + cker_param.stride_height = param.stride.vertical; + cker_param.dilation_width_factor = 1; + cker_param.dilation_height_factor = 1; + + const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape()); + const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape()); + const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape()); + const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO()); + const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO()); + float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer()); + + nnfw::cker::TransposeConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, + cker_ofm_shape, ofm_ptr); +} + +void invokeTransposeConv(const ExecEnv *env, const ir::Operation &node) +{ + const auto &tconv_node = + nnfw::misc::polymorphic_downcast<const ir::operation::TransposeConv &>(node); + + const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT); + const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL); + const auto ofm_index = node.getOutputs().at(0); + + const auto ifm_tensor = env->tensorAt(ifm_index); + const auto ker_tensor = env->tensorAt(ker_index); + const auto ofm_tensor = env->tensorAt(ofm_index); + + const auto data_type = ifm_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + invoke(ifm_tensor, ker_tensor, ofm_tensor, tconv_node.param()); + } + else + { + throw std::runtime_error{"Interp(TConv): Support float32 only"}; + } +} + +} // namespace transposeconv + +OpKernel *getTransposeConv() +{ + static OpKernel kernel = {prepareTransposeConv, invokeTransposeConv}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/exec/interp/operations/UnaryActivations.cc b/runtime/neurun/core/src/exec/interp/operations/UnaryActivations.cc new file mode 100644 index 000000000..116806fc4 --- /dev/null +++ b/runtime/neurun/core/src/exec/interp/operations/UnaryActivations.cc @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cmath> + +#include "OperationUtil.h" + +#include "exec/interp/Registration.h" + +#include "ir/operation/ReLU.h" +#include "ir/operation/ReLU1.h" +#include "ir/operation/ReLU6.h" +#include "ir/operation/Tanh.h" + +namespace neurun +{ +namespace exec +{ +namespace interp +{ +namespace +{ + +enum class ActivationType +{ + ReLU, + ReLU1, + ReLU6, + Tanh +}; + +void prepare(ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(0); + const auto output_index = node.getOutputs().at(0); + + const auto input_tensor = env->tensorAt(input_index); + + const auto output_info = env->graph().operands().at(output_index).info(); + if (output_info.total_size() == 0) + { + // Output's shape and type is same with input + auto input_info = input_tensor->tensorInfo(); + // We can handle already allocated (ex. model output) + env->allocateIfNeeded(output_index, input_info); + } + else + { + env->allocateIfNeeded(output_index, output_info); + } + + const auto output_tensor = env->tensorAt(output_index); + // Check shape and type lhs is same with output + // TODO Util function to compare TensorInfo + if (input_tensor->data_type() != output_tensor->data_type()) + { + throw std::runtime_error{"Interp(Activations): Invalid output type"}; + } +} + +template <ActivationType act_type> +void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements) +{ + std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); }; + switch (act_type) + { + case ActivationType::ReLU: + fn = [](const float &in) { return std::max(0.f, in); }; + break; + case ActivationType::ReLU1: + fn = [](const float &in) { return std::min(std::max(-1.f, in), 1.f); }; + break; + case ActivationType::ReLU6: + fn = [](const float &in) { return std::min(std::max(0.f, in), 6.f); }; + break; + case ActivationType::Tanh: + fn = [](const float &in) { return std::tanh(in); }; + break; + default: + throw std::runtime_error{"Interp(Activations): NYI - Unsupported activation"}; + break; + } + + const float *input_end = input_ptr + num_elements; + for (; input_ptr < input_end; input_ptr++, output_ptr++) + { + *output_ptr = fn(*input_ptr); + } +} + +template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Operation &node) +{ + const auto input_index = node.getInputs().at(0); + const auto output_index = node.getOutputs().at(0); + + // Check lhs shape is same with rhs (with broadcast) + const auto input_tensor = env->tensorAt(input_index); + const auto output_tensor = env->tensorAt(output_index); + + const auto data_type = input_tensor->data_type(); + if (data_type == ir::DataType::FLOAT32) + { + uint64_t elements = input_tensor->num_elements(); + const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO()); + float *out = reinterpret_cast<float *>(output_tensor->buffer()); + + evalFloat<act_type>(input_start, out, elements); + } + else + { + throw std::runtime_error{"Interp(ReLU6): NYI - Support float only"}; + } +} + +} // namespace + +OpKernel *getReLU() +{ + static OpKernel kernel = {prepare, invoke<ActivationType::ReLU>}; + return &kernel; +} + +OpKernel *getReLU1() +{ + static OpKernel kernel = {prepare, invoke<ActivationType::ReLU1>}; + return &kernel; +} + +OpKernel *getReLU6() +{ + static OpKernel kernel = {prepare, invoke<ActivationType::ReLU6>}; + return &kernel; +} + +OpKernel *getTanh() +{ + static OpKernel kernel = {prepare, invoke<ActivationType::Tanh>}; + return &kernel; +} + +} // namespace interp +} // namespace exec +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/Graph.cc b/runtime/neurun/core/src/ir/Graph.cc new file mode 100644 index 000000000..a84ebb68b --- /dev/null +++ b/runtime/neurun/core/src/ir/Graph.cc @@ -0,0 +1,551 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Graph.h" + +#include <algorithm> +#include <bitset> +#include <sstream> + +#include "util/logging.h" +#include "verifier/Verifier.h" +#include "cpp14/memory.h" +#include "ir/operation/LowerInfo.h" +#include "ir/operand/LowerInfo.h" +#include "ir/operand/PermuteFactor.h" +#include "ir/GraphIterator.h" +#include "operand/Shape4DConvert.h" +#include "compiler/BackendResolver.h" +#include "backend/IConfig.h" +#include "pass/ConstantInsertionPass.h" +#include "pass/PermutationInsertionPass.h" +#include "pass/PermutationEliminationPass.h" +#include "pass/PermutationOperationPass.h" + +namespace neurun +{ +namespace ir +{ + +Graph::Graph() = default; + +Graph::~Graph(void) = default; + +OperandIndex Graph::addOperand(const Shape &shape, const TypeInfo &type) +{ + return _operands.emplace(shape, type); +} + +OperationIndex Graph::addOperation(std::unique_ptr<Operation> &&node) +{ + assert(isBuildingPhase()); + return _operations.push(std::move(node)); +} + +void Graph::setOperandValue(const OperandIndex &ind, std::unique_ptr<Data> &&data) +{ + assert(isBuildingPhase()); + assert(_operands.exist(ind)); + _operands.at(ind).data(std::move(data)); +} + +void Graph::addInput(const OperandIndex &ind) +{ + assert(isBuildingPhase()); + _inputs.append(ind); +} + +void Graph::addOutput(const OperandIndex &ind) +{ + assert(isBuildingPhase()); + _outputs.append(ind); +} + +void Graph::finishBuilding(void) +{ + assert(isBuildingPhase()); + _phase = Phase::MODEL; + + // Initialize operand use-def + initializeUseDef(); + + // Call graph verifications for the MODEL phase + { + assert(verifier::DAGChecker().verify(*this)); + assert(verifier::EdgeConsistencyChecker().verify(*this)); + } +} + +void Graph::lower(void) +{ + assert(_phase == Phase::MODEL); + + _op_seqs = nnfw::cpp14::make_unique<Subgraphs>(); + + // Lower + { + // operand::LowerInfo holder + OperandIndexMap<std::unique_ptr<operand::LowerInfo>> operands_lower_info; + + _operands.iterate([&](const OperandIndex &index, const Operand &object) { + operands_lower_info[index] = + nnfw::cpp14::make_unique<operand::LowerInfo>(operand::asShape4D(object.shape())); + }); + + _lower_info_map = nnfw::cpp14::make_unique<LowerInfoMap>(); + + // Make subgraphs while checking whether a node can be merged into a op_seq. + makeSubgraphs(operands_lower_info); + + _op_seqs->iterate([&](const SubgraphIndex &, OpSequence &subg) { + assert(subg.operations().size() > 0); + std::reverse(std::begin(subg.operations()), std::end(subg.operations())); + }); + + _op_seqs->dump("merged and sorted operations without permutation"); + + pass::ConstantInsertionPass ci_pass(*this); + ci_pass.run(); + + // Set LowerInfo for each operand from the operand::LowerInfo holder + manipulateLowerInfo(operands_lower_info); + + dumpLowerInfo(); + } + + // Run Permutation Passes + { + pass::PermutationOperationPass po_pass(*this); + po_pass.run(); + + pass::PermutationInsertionPass pi_pass(*this); + pi_pass.run(); + // Implemented code no longer works. + // pass::PermutationEliminationPass pe_pass(*this); + // pe_pass.run(); + + // TODO merge perm subgraphs if possible + _op_seqs->dump("merged and sorted operations with permutation"); + } + + // Graph verifications for the LOWERED phase + { + assert(verifier::DAGChecker().verify(*this)); + assert(verifier::EdgeConsistencyChecker().verify(*this)); + } +} + +void Graph::initializeUseDef() +{ + operations().iterate([&](const OperationIndex &index, const Operation &node) -> void { + auto outputs = node.getOutputs(); + for (auto output : outputs) + { + operands().at(output).appendDef(index); + } + + auto inputs = node.getInputs(); + for (auto input : inputs) + { + operands().at(input).appendUse(index); + } + }); +} + +const operation::LowerInfo *Graph::getLowerInfo(const SubgraphIndex &subg_index) const +{ + if (!_lower_info_map) + return nullptr; + auto itr = _lower_info_map->operation.find(subg_index); + if (itr == _lower_info_map->operation.end()) + return nullptr; + return itr->second.get(); +} + +void Graph::setLowerInfo(const SubgraphIndex &subg_index, + std::unique_ptr<operation::LowerInfo> &&lower_info) +{ + assert(_lower_info_map); + _lower_info_map->operation.insert(std::make_pair(subg_index, std::move(lower_info))); +} + +void Graph::removeLowerInfo(const SubgraphIndex &subg_index) +{ + auto &subg_lower_info = _lower_info_map->operation; + assert(subg_lower_info.find(subg_index) != subg_lower_info.end()); + for (auto it = subg_lower_info.begin(); it != subg_lower_info.end(); ++it) + { + if (it->first == subg_index) + { + subg_lower_info.erase(it); + break; + } + } +} + +const operand::LowerInfo *Graph::getLowerInfo(const OperandIndex &index) const +{ + if (!_lower_info_map) + return nullptr; + auto itr = _lower_info_map->operand.find(index); + if (itr == _lower_info_map->operand.end()) + return nullptr; + return itr->second.get(); +} + +operand::LowerInfo *Graph::getLowerInfo(const OperandIndex &index) +{ + if (!_lower_info_map) + return nullptr; + auto itr = _lower_info_map->operand.find(index); + if (itr == _lower_info_map->operand.end()) + return nullptr; + return itr->second.get(); +} + +void Graph::setLowerInfo(const OperandIndex &index, + std::unique_ptr<operand::LowerInfo> &&lower_info) +{ + assert(_lower_info_map); + _lower_info_map->operand.insert(std::make_pair(index, std::move(lower_info))); +} + +void Graph::removeLowerInfo(const OperandIndex &index) { _lower_info_map->operand.erase(index); } + +void Graph::makeSubgraphs(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info) +{ + // if SUBG_MAX_NODE == 0, no limit on nodes of a op_seq + const int subg_max_node = util::getConfigInt(util::config::SUBG_MAX_NODE); + assert(subg_max_node >= 0); + + bool is_profiling = util::getConfigBool(util::config::PROFILING_MODE); + OpSequence *subg = nullptr; + SubgraphIndex subg_index; + + // NOTE: The below method appends nodes while making one op_seq if needed. If something better + // ways, happy to update this code. + PostDfsConstIterator{}.iterate(*this, [&](const OperationIndex &node_index, + const Operation &node) { + // LowerInfo for in/output operands + auto backend = _backend_resolver->getBackend(node_index); + + // TODO How to get frontend layout of this node from IR + auto frontend_layout = Layout::NHWC; + auto backend_layout = frontend_layout; + + // The layout of each backend should be set at another place + // TODO Change setting layout of each backend at another place + // TODO Remove getting id of backend + if (backend->config()->id() == "acl_cl" || backend->config()->id() == "acl_neon") + { + const std::string acl_layout_str = util::getConfigString(util::config::ACL_LAYOUT); + if (acl_layout_str == "NHWC") + { + backend_layout = Layout::NHWC; + } + else if (acl_layout_str == "NCHW") + { + backend_layout = Layout::NCHW; + } + } + else if (backend->config()->id() == "srcn") + { + const std::string ncnn_layout_str = util::getConfigString(util::config::NCNN_LAYOUT); + if (ncnn_layout_str == "NHWC") + { + backend_layout = Layout::NHWC; + } + else if (ncnn_layout_str == "NCHW") + { + backend_layout = Layout::NCHW; + } + } + else if (backend->config()->id() == "cpu") + { + backend_layout = Layout::NHWC; + } + + for (auto operand : node.getInputs()) + { + auto &&lower_info = operands_lower_info.at(operand); + lower_info->addUsePermuteFactor(operand::PermuteFactor{backend, backend_layout}); + } + for (auto operand : node.getOutputs()) + { + auto &&lower_info = operands_lower_info.at(operand); + lower_info->addDefPermuteFactor(operand::PermuteFactor{backend, backend_layout}); + } + + bool new_subg = + (subg == nullptr || + (subg_max_node != 0 && subg->operations().size() >= static_cast<size_t>(subg_max_node))); + + // for profiling each op_seq must contain just one node, + // so that we can measure a node separately + if (new_subg || is_profiling || !mergeable(subg_index, node_index, backend_layout)) + { + auto new_subg_index = appendFreshSingleOpSubgraph(node_index, node, frontend_layout); + + // OpSequence LowerInfo + setLowerInfo(new_subg_index, + nnfw::cpp14::make_unique<operation::LowerInfo>(backend, backend_layout)); + + subg_index = new_subg_index; + subg = &(_op_seqs->at(new_subg_index)); + + VERBOSE(Lower) << "SUBG#" << subg_index.value() << " is created for " + << "NODE#" << node_index.value() << "(" << node.name() << ")" << std::endl; + } + else + { + subg->appendOperation(node_index, node); + subg->setInputs(node.getInputs()); + + VERBOSE(Lower) << "SUBG#" << subg_index.value() << " merges " + << "NODE#" << node_index.value() << "(" << node.name() << ")" << std::endl; + } + }); +} + +void Graph::manipulateLowerInfo( + OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info) +{ + const auto default_backend = backend::BackendManager::get().getDefault(); + for (auto index : _inputs) + { + // Pick just any one from the uses, here the first one is chosen + // For the other uses, Permute operations will be inserted later + auto &&lower_info = operands_lower_info.at(index); + assert(lower_info->use_factors().size() > 0); + lower_info->addDefPermuteFactor(*lower_info->use_factors().begin()); + } + for (auto index : _outputs) + { + auto &&lower_info = operands_lower_info.at(index); + if (_operands.at(index).isConstant()) + { + lower_info->addDefPermuteFactor(operand::PermuteFactor{ + default_backend, + Layout::NHWC // TODO Get frontend layout of this node from IR + }); + } + } + + // Set LowerInfo for each operand from the operand::LowerInfo holder + _operands.iterate([&](const OperandIndex &index, Operand &) { + setLowerInfo(index, std::move(operands_lower_info[index])); + }); +} + +void Graph::dumpLowerInfo() +{ + if (::neurun::util::logging::ctx.enabled() == false) + return; + + std::map<uint32_t, std::string> dumps; + + _operands.iterate([&](const OperandIndex &index, Operand &object) { + std::stringstream sstream; + if (!getLowerInfo(index)->def_factors().empty() || !getLowerInfo(index)->use_factors().empty()) + { + auto factors_to_string = [](const operand::PermuteFactorSet &factors) { + std::string str; + for (auto factor : factors) + { + str += factor.backend()->config()->id(); + str += "(" + to_string(factor.layout()) + ")"; + str += " "; + } + return "{ " + str + "}"; + }; + + auto operation_index_to_string = [](const OperationIndexList &operations) { + std::string str; + for (auto op : operations.list()) + { + str += std::to_string(op.value()); + str += " "; + } + return "{ " + str + "}"; + }; + + const auto lower_info = getLowerInfo(index); + const auto &shape = object.shape(); + const auto &lower_shape = lower_info->shape(); + std::string def_ops = operation_index_to_string(object.getDef()); + std::string use_ops = operation_index_to_string(object.getUses()); + std::string def_layouts = factors_to_string(lower_info->def_factors()); + std::string use_layouts = factors_to_string(lower_info->use_factors()); + sstream << "Operand #" << index.value() << " LowerInfo" << std::endl; + sstream << " - Shape : { " << (shape.rank() > 0 ? shape.dim(0) : 0) << " " + << (shape.rank() > 1 ? shape.dim(1) : 0) << " " + << (shape.rank() > 2 ? shape.dim(2) : 0) << " " + << (shape.rank() > 3 ? shape.dim(3) : 0) << " " + << "}" << std::endl; + sstream << " - Def Operations : " << def_ops << std::endl; + sstream << " - Use Operations : " << use_ops << std::endl; + sstream << " - Lower Info" << std::endl; + sstream << " - 4D Shape (NHWC) : { " << lower_shape.n() << " " << lower_shape.h() << " " + << lower_shape.w() << " " << lower_shape.c() << " " + << "}" << std::endl; + sstream << " - Def Backends : " << def_layouts << std::endl; + sstream << " - Use Backends : " << use_layouts << std::endl; + } + dumps.emplace(index.value(), sstream.str()); + }); + + for (const auto &e : dumps) + { + if (!e.second.empty()) + { + VERBOSE(Lower) << e.second; + } + } +} + +bool Graph::mergeable(const SubgraphIndex &subg_index, const OperationIndex &node_index, + Layout layout) +{ + // Are they mergeable? + // 1. the same backend id and layout? + // 2. Is op_seq or node branched? + // 3. if 1 is true, the subg and a node are connected? + const auto &subg = _op_seqs->at(subg_index); + const auto &node = _operations.at(node_index); + + // The same backend id and layout? + { + const auto subg_backend_layout = getLowerInfo(subg_index)->layout(); + const auto &subg_backend_id = getLowerInfo(subg_index)->backend()->config()->id(); + const auto &node_backend_id = _backend_resolver->getBackend(node_index)->config()->id(); + VERBOSE(Lower) << "SUBG#" << subg_index.value() << " { " << subg_backend_id << "(" + << to_string(subg_backend_layout) << ") } " + << " NODE#" << node_index.value() << " (" << node.name() << ") { " + << node_backend_id << "(" << to_string(layout) << ") } " << std::endl; + if (subg_backend_id != node_backend_id || subg_backend_layout != layout) + return false; + } + + // Branched? + { + std::unordered_set<OperationIndex> branched_set; + + // Check for branching up + const auto &inputs = subg.getInputs(); + for (const auto &input : inputs) + { + const auto &input_obj = _operands.at(input); + for (const auto &def : input_obj.getDef().list()) + { + branched_set.insert(def); + if (branched_set.size() > 1) + { + return false; + } + } + } + branched_set.clear(); + + // Check for branching down + const auto &outputs = node.getOutputs(); + for (const auto &output : outputs) + { + const auto &output_obj = _operands.at(output); + for (const auto &use : output_obj.getUses().list()) + { + branched_set.insert(use); + if (branched_set.size() > 1) + { + return false; + } + } + } + } + + // Connected? + // an input of one node is an output of the other node? or vice-versa? + { + const auto &node_inputs = node.getInputs(); + const auto &node_outputs = node.getOutputs(); + + // subg's operations are in order so that we just check the first and the last + std::vector<Element> subg_ops{subg.operations()[0]}; + if (subg.operations().size() > 1) + subg_ops.emplace_back(subg.operations()[subg.operations().size() - 1]); + + for (const auto &elem : subg_ops) + { + const auto &n_index = elem.index; + const auto &n = *elem.node; + + // node's output == subg's input? + const auto &n_inputs = n.getInputs(); + for (auto input : n_inputs) + { + if (node_outputs.contains(input)) + { + VERBOSE(Lower) << "SUBG#" << subg_index.value() << " 's NODE#" << n_index.value() << "(" + << n.name() << ") is connected to NODE#" << node_index.value() << "(" + << node.name() << ")" << std::endl; + return true; + } + } + + // node's input == subg's output? + const auto &n_outputs = n.getOutputs(); + for (auto output : n_outputs) + { + if (node_inputs.contains(output)) + { + VERBOSE(Lower) << "SUBG#" << subg_index.value() << " 's NODE#" << n_index.value() << " (" + << n.name() << ") is connected to NODE#" << node_index.value() + << std::endl; + return true; + } + } + } + + VERBOSE(Lower) << "SUBG#" << subg_index.value() << " is not connected to NODE#" + << node_index.value() << "(" << node.name() << ")" << std::endl; + } + + return false; +} + +SubgraphIndex Graph::appendFreshSingleOpSubgraph(const OperationIndex &node_index, + const Operation &node, Layout layout) +{ + // Create a fresh op_seq with one operation, and append it to subgraphs + // Create a fresh op_seq + auto subg = nnfw::cpp14::make_unique<OpSequence>(layout); + + // Add an operation + subg->appendOperation(node_index, node); + + // Update input/output + subg->setOutputs(node.getOutputs()); + subg->setInputs(node.getInputs()); + + return _op_seqs->emplace(std::move(subg)); +} + +void Graph::setBackendResolver(std::unique_ptr<compiler::BackendResolver> &&br) +{ + _backend_resolver = std::move(br); +} + +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/GraphIterator.cc b/runtime/neurun/core/src/ir/GraphIterator.cc new file mode 100644 index 000000000..ce20787ae --- /dev/null +++ b/runtime/neurun/core/src/ir/GraphIterator.cc @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GraphIterator.h" + +#include "ir/OperationIndexMap.h" +#include "ir/Graph.h" + +namespace neurun +{ +namespace ir +{ + +// Explicit instantiations to have implementation in the source file. + +template class DefaultIterator<true>; +template class DefaultIterator<false>; + +template class PostDfsIterator<true>; +template class PostDfsIterator<false>; + +// +// Graph::DefaultIterator +// + +template <bool is_const> +void DefaultIterator<is_const>::iterate(GraphRef graph, const IterFn &fn) const +{ + graph.operations().iterate( + [&](const OperationIndex &index, NodeRef node) -> void { fn(index, node); }); +} + +// +// Graph::PostDfsIterator +// + +template <bool is_const> +void PostDfsIterator<is_const>::iterate(GraphRef graph, const IterFn &fn) const +{ + assert(!graph.isBuildingPhase()); // Restrict iteration condition + + OperationIndexMap<bool> visited; + graph.operations().iterate([&](const OperationIndex &index, NodeRef) { visited[index] = false; }); + + std::function<void(const OperationIndex &, NodeRef)> dfs_recursive = + [&](const OperationIndex &index, NodeRef node) -> void { + if (visited[index]) + return; + visited[index] = true; + + for (auto output : node.getOutputs()) + { + const auto &operand = graph.operands().at(output); + for (const auto &use : operand.getUses().list()) + { + dfs_recursive(use, graph.operations().at(use)); + } + } + + fn(index, node); + }; + + graph.operations().iterate(dfs_recursive); + + // All of the operations(nodes) must have been visited. + assert(std::all_of(visited.begin(), visited.end(), + [](const std::pair<const OperationIndex, bool> &v) { return v.second; })); +} + +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/GraphIterator.h b/runtime/neurun/core/src/ir/GraphIterator.h new file mode 100644 index 000000000..a5bf1c323 --- /dev/null +++ b/runtime/neurun/core/src/ir/GraphIterator.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_GRAPH_ITERATOR_H__ +#define __NEURUN_IR_GRAPH_ITERATOR_H__ + +#include <type_traits> + +#include "ir/Index.h" + +namespace neurun +{ +namespace ir +{ + +class Graph; +class Operation; + +template <bool is_const> class Iterator +{ +public: + using GraphRef = typename std::conditional<is_const, const Graph &, Graph &>::type; + using IndexRef = const OperationIndex &; + using NodeRef = typename std::conditional<is_const, const Operation &, Operation &>::type; + using IterFn = std::function<void(IndexRef, NodeRef)>; + +public: + virtual ~Iterator() = default; + virtual void iterate(GraphRef graph, const IterFn &fn) const = 0; +}; + +template <bool is_const = false> class DefaultIterator final : public Iterator<is_const> +{ +public: + using GraphRef = typename Iterator<is_const>::GraphRef; + using IndexRef = typename Iterator<is_const>::IndexRef; + using NodeRef = typename Iterator<is_const>::NodeRef; + using IterFn = typename Iterator<is_const>::IterFn; + +public: + void iterate(GraphRef graph, const IterFn &fn) const; +}; +using DefaultConstIterator = DefaultIterator<true>; + +template <bool is_const = false> class PostDfsIterator final : public Iterator<is_const> +{ +public: + using GraphRef = typename Iterator<is_const>::GraphRef; + using IndexRef = typename Iterator<is_const>::IndexRef; + using NodeRef = typename Iterator<is_const>::NodeRef; + using IterFn = typename Iterator<is_const>::IterFn; + +public: + void iterate(GraphRef graph, const IterFn &fn) const; +}; +using PostDfsConstIterator = PostDfsIterator<true>; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_GRAPH_ITERATOR_H__ diff --git a/runtime/neurun/core/src/ir/LayoutSet.cc b/runtime/neurun/core/src/ir/LayoutSet.cc new file mode 100644 index 000000000..025ba45dc --- /dev/null +++ b/runtime/neurun/core/src/ir/LayoutSet.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "LayoutSet.h" + +namespace neurun +{ +namespace ir +{ + +LayoutSet::LayoutSet(std::initializer_list<Layout> layouts) +{ + for (auto layout : layouts) + { + _set.insert(layout); + } +} + +LayoutSet LayoutSet::operator|(const LayoutSet &other) const +{ + auto ret = *this; + for (auto layout : other) + { + ret.add(layout); + } + return ret; +} + +LayoutSet LayoutSet::operator&(const LayoutSet &other) const +{ + LayoutSet ret; + for (auto layout : other) + { + if (contains(layout)) + { + ret.add(layout); + } + } + return ret; +} + +LayoutSet LayoutSet::operator-(const LayoutSet &other) const +{ + auto ret = *this; + for (auto layout : other) + { + ret.remove(layout); + } + return ret; +} + +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/LayoutSet.h b/runtime/neurun/core/src/ir/LayoutSet.h new file mode 100644 index 000000000..e38ef3ce2 --- /dev/null +++ b/runtime/neurun/core/src/ir/LayoutSet.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_IR_LAYOUT_SET_H__ +#define __NEURUN_IR_LAYOUT_SET_H__ + +#include <initializer_list> +#include <unordered_set> + +#include "ir/Layout.h" + +namespace neurun +{ +namespace ir +{ + +class LayoutSet +{ +public: + LayoutSet() = default; + LayoutSet(std::initializer_list<Layout> layouts); + +public: + void add(const Layout &layout) { _set.insert(layout); } + void remove(const Layout &layout) { _set.erase(layout); } + uint32_t size() const { return static_cast<uint32_t>(_set.size()); } + bool contains(const Layout &layout) const { return _set.find(layout) != _set.end(); } + +public: + LayoutSet operator|(const LayoutSet &other) const; // Union + LayoutSet operator&(const LayoutSet &other) const; // Intersect + LayoutSet operator-(const LayoutSet &other) const; // Minus + +public: + std::unordered_set<Layout>::const_iterator begin() const { return _set.begin(); } + std::unordered_set<Layout>::const_iterator end() const { return _set.end(); } + +private: + std::unordered_set<Layout> _set; +}; + +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_IR_LAYOUT_SET_H__ diff --git a/runtime/neurun/core/src/ir/OpCode.cc b/runtime/neurun/core/src/ir/OpCode.cc new file mode 100644 index 000000000..e6552a275 --- /dev/null +++ b/runtime/neurun/core/src/ir/OpCode.cc @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/OpCode.h" + +#include <unordered_map> + +namespace neurun +{ +namespace ir +{ + +const char *toString(OpCode opcode) +{ + static const std::unordered_map<OpCode, const char *> map{{OpCode::Invalid, "Invalid"}, +#define OP(Name) {OpCode::Name, #Name}, +#include "ir/Operations.lst" +#undef OP + {OpCode::COUNT, "COUNT"}}; + return map.at(opcode); +} + +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/OpSequence.cc b/runtime/neurun/core/src/ir/OpSequence.cc new file mode 100644 index 000000000..13a6cbe27 --- /dev/null +++ b/runtime/neurun/core/src/ir/OpSequence.cc @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/OpSequence.h" +#include "ir/OperationVisitor.h" +#include <sstream> + +namespace neurun +{ +namespace ir +{ + +OpSequence::OpSequence(Layout layout) : _layout{layout} +{ + // DO NOTHING +} + +void OpSequence::accept(OperationVisitor &v) const { v.visit(*this); } + +// TODO: Impl Dumper instead of this method +std::string OpSequence::getStr() const +{ + // " OpSequence IN(xx,xx,xx) -> { op0, op1, op2 } -> OUT(yy,yy,yy)" + std::stringstream ss; + ss << " OpSequence IN("; + for (const auto &index : getInputs()) + { + ss << " " << index.value(); + } + ss << " ) -> {"; + for (const auto &elem : _operations) + { + ss << " " << elem.index.value() << "(" << elem.node->name() << ")"; + } + ss << " } -> OUT("; + for (const auto &index : getOutputs()) + { + ss << " " << index.value(); + } + ss << " )"; + return ss.str(); +} + +void OpSequence::remove(const OperationIndex &index) +{ + assert(exist(index)); + for (auto it = _operations.cbegin(); it != _operations.cend(); ++it) + { + if (it->index == index) + { + _operations.erase(it); + break; + } + } +} + +bool OpSequence::exist(const OperationIndex &index) const +{ + for (const auto &element : _operations) + { + if (element.index == index) + { + return true; + } + } + return false; +} + +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/Operand.cc b/runtime/neurun/core/src/ir/Operand.cc new file mode 100644 index 000000000..335dd17b9 --- /dev/null +++ b/runtime/neurun/core/src/ir/Operand.cc @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Operand.h" + +namespace neurun +{ +namespace ir +{ + +size_t Operand::operandSize(void) const +{ + const uint32_t ranks = shape().rank(); + int32_t elements = 1; + + for (uint32_t rank = 0; rank < ranks; rank++) + { + elements *= shape().dim(rank); + } + + DataType type = typeInfo().type(); + size_t element_size = sizeOfDataType(type); + + // Value of type is matched with OperandCode enum in NeuralNetworks.h + return element_size * elements; +} + +void Operand::appendUse(const OperationIndex &idx) { _uses.append(idx); } + +void Operand::removeUse(const OperationIndex &idx) { _uses.remove(idx); } + +void Operand::appendDef(const OperationIndex &idx) +{ + assert(!isConstant()); + assert(_def.size() == 0); + + _def.append(idx); +} + +void Operand::removeDef(const OperationIndex &idx) +{ + assert(_def.contains(idx)); + + _def.remove(idx); +} + +void Operand::parent_info(std::unique_ptr<operand::ParentInfo> &&parent_info) +{ + _parent_info = std::move(parent_info); +} + +const operand::ParentInfo *Operand::parent_info() const { return _parent_info.get(); } + +operand::ParentInfo *Operand::parent_info() { return _parent_info.get(); } + +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/OperandIndexSequence.cc b/runtime/neurun/core/src/ir/OperandIndexSequence.cc new file mode 100644 index 000000000..302444125 --- /dev/null +++ b/runtime/neurun/core/src/ir/OperandIndexSequence.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/OperandIndexSequence.h" + +#include <algorithm> + +namespace neurun +{ +namespace ir +{ + +OperandIndexSequence::OperandIndexSequence(std::initializer_list<OperandIndex> list) : _set(list) +{ + // DO NOTHING +} + +OperandIndexSequence::OperandIndexSequence(std::initializer_list<int32_t> list) +{ + for (auto val : list) + { + _set.emplace_back(static_cast<uint32_t>(val)); + } +} + +OperandIndexSequence::OperandIndexSequence(std::initializer_list<uint32_t> list) +{ + for (auto val : list) + { + _set.emplace_back(val); + } +} + +bool OperandIndexSequence::contains(const OperandIndex &index) const +{ + return std::find(_set.begin(), _set.end(), index) != _set.end(); +} + +void OperandIndexSequence::replace(const OperandIndex &from, const OperandIndex &to) +{ + std::replace(_set.begin(), _set.end(), from, to); +} + +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/Operation.cc b/runtime/neurun/core/src/ir/Operation.cc new file mode 100644 index 000000000..3e4b606f2 --- /dev/null +++ b/runtime/neurun/core/src/ir/Operation.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Operation.h" + +#include <cassert> + +namespace neurun +{ +namespace ir +{ + +Operation::Operation(OperandConstraint input_constr, const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs) + : _input_constr{input_constr}, _inputs{inputs}, _outputs{outputs} +{ +} + +Operation::Operation(OperandConstraint input_constr) : _input_constr{input_constr} {} + +Operation::~Operation() = default; + +void Operation::setInputs(const OperandIndexSequence &indexes) +{ + assert(_input_constr.check(indexes.size())); + _inputs = indexes; +} + +void Operation::setOutputs(const OperandIndexSequence &indexes) { _outputs = indexes; } + +void Operation::replaceInput(const OperandIndex &from, const OperandIndex &to) +{ + _inputs.replace(from, to); +} + +void Operation::replaceOutput(const OperandIndex &from, const OperandIndex &to) +{ + _outputs.replace(from, to); +} + +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/OperationIndexList.cc b/runtime/neurun/core/src/ir/OperationIndexList.cc new file mode 100644 index 000000000..261cc5ce6 --- /dev/null +++ b/runtime/neurun/core/src/ir/OperationIndexList.cc @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/OperationIndexList.h" + +#include <algorithm> + +namespace neurun +{ +namespace ir +{ + +OperationIndexList::OperationIndexList(std::initializer_list<OperationIndex> list) : _list(list) +{ + // DO NOTHING +} + +bool OperationIndexList::contains(const OperationIndex &index) const +{ + return std::find(_list.begin(), _list.end(), index) != _list.end(); +} + +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/Shape.cc b/runtime/neurun/core/src/ir/Shape.cc new file mode 100644 index 000000000..2679f83c6 --- /dev/null +++ b/runtime/neurun/core/src/ir/Shape.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Shape.h" +#include "util/Utils.h" + +#include <cassert> +#include <functional> +#include <numeric> + +namespace neurun +{ +namespace ir +{ + +FeatureShape Shape::asFeature(Layout layout) const +{ + assert(rank() == 4); + + if (layout == Layout::NHWC) + { + // Feature Map in NHWC layout + // - Dimension(0) -> Batch + // - Dimension(1) -> Height + // - Dimension(2) -> Width + // - Dimension(3) -> Depth + const auto batch = dim(0); + const auto depth = dim(3); + const auto height = dim(1); + const auto width = dim(2); + + return {batch, depth, height, width}; + } + else if (layout == Layout::NCHW) + { + // Feature Map in NHWC layout + // - Dimension(0) -> Batch + // - Dimension(1) -> Depth + // - Dimension(2) -> Height + // - Dimension(3) -> Width + const auto batch = dim(0); + const auto depth = dim(1); + const auto height = dim(2); + const auto width = dim(3); + + return {batch, depth, height, width}; + } + else + { + throw std::runtime_error("Wrong Layout"); + } +} + +// Extended dimension is filled with 1. +void Shape::extendRank(int to_rank) +{ + assert(to_rank - rank() >= 0); + _dimensions.insert(_dimensions.cbegin(), to_rank - rank(), 1); +} + +uint64_t Shape::num_elements() const +{ + // All of the nodes must have non-negative dimension + assert(std::all_of(_dimensions.begin(), _dimensions.end(), + [](const int32_t &v) { return (v >= 0); })); + + return std::accumulate(_dimensions.cbegin(), _dimensions.cend(), UINT64_C(1), + std::multiplies<uint64_t>()); +} + +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/Subgraphs.cc b/runtime/neurun/core/src/ir/Subgraphs.cc new file mode 100644 index 000000000..780fc8c28 --- /dev/null +++ b/runtime/neurun/core/src/ir/Subgraphs.cc @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Subgraphs.h" +#include "util/logging.h" +#include "cpp14/memory.h" + +#include <cassert> +#include <string> + +namespace neurun +{ +namespace ir +{ + +SubgraphIndex Subgraphs::emplace(const OperationIndex &index, const Operation &node, Layout layout) +{ + std::unique_ptr<OpSequence> subg = nnfw::cpp14::make_unique<OpSequence>(layout); + subg->appendOperation(index, node); + return push(std::move(subg)); +} + +SubgraphIndex Subgraphs::emplace(std::unique_ptr<OpSequence> &&subg) +{ + return push(std::move(subg)); +} + +bool Subgraphs::containsOperation(const OperationIndex &operation_index) const +{ + return findOperation(operation_index).valid(); +} + +SubgraphIndex Subgraphs::getOperation(const OperationIndex &operation_index) const +{ + SubgraphIndex ret = findOperation(operation_index); + assert(ret.valid()); + return ret; +} + +// TODO: Extract this into external helper function +void Subgraphs::dump(const std::string &msg) const +{ + VERBOSE(Subgraphs) << "Subgraphs(" << msg << ")" << std::endl; + iterate([&](const SubgraphIndex &idx, const OpSequence &subg) { + VERBOSE(Subgraphs) << idx.value() << "] " << subg.getStr() << std::endl; + }); +} + +void Subgraphs::removeFromSubgraph(const OperationIndex &operation_index) +{ + const auto subg_index = findOperation(operation_index); + auto &subg = at(subg_index); + subg.remove(operation_index); + if (subg.size() == 0) + { + remove(subg_index); + } +} + +SubgraphIndex Subgraphs::findOperation(const OperationIndex &operation_index) const +{ + SubgraphIndex ret; + iterate([&](const SubgraphIndex &index, const OpSequence &object) { + for (const auto &elem : object.operations()) + { + if (elem.index == operation_index) + ret = index; + } + }); + return ret; +} + +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/TypeInfo.cc b/runtime/neurun/core/src/ir/TypeInfo.cc new file mode 100644 index 000000000..280146b51 --- /dev/null +++ b/runtime/neurun/core/src/ir/TypeInfo.cc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/TypeInfo.h" + +namespace neurun +{ +namespace ir +{ + +bool operator==(const TypeInfo &lhs, const TypeInfo &rhs) +{ + if (lhs.type() != rhs.type()) + { + return false; + } + + if (lhs.offset() != rhs.offset()) + { + return false; + } + + if (lhs.scale() != rhs.scale()) + { + return false; + } + + return true; +} + +bool operator!=(const TypeInfo &lhs, const TypeInfo &rhs) { return !(lhs == rhs); } + +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/dumper/Dumper.cc b/runtime/neurun/core/src/ir/dumper/Dumper.cc new file mode 100644 index 000000000..ddfd1a47a --- /dev/null +++ b/runtime/neurun/core/src/ir/dumper/Dumper.cc @@ -0,0 +1,633 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Dumper.h" + +#include <string> + +#include "util/logging.h" + +namespace neurun +{ +namespace ir +{ +namespace dumper +{ + +using namespace operation; + +void Dumper::visit(const Abs &node) +{ + VERBOSE(LIR) << "* Abs" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Abs::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Add &node) +{ + VERBOSE(LIR) << "* Add" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Add::Input::LHS).value() << ", " + << node.getInputs().at(Add::Input::RHS).value() << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const ArgMax &node) +{ + VERBOSE(LIR) << "* ArgMax" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ArgMax::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const AvgPool2D &node) +{ + VERBOSE(LIR) << "* AvgPool2D(Implicit)" << std::endl; + VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(AvgPool2D::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Cast &node) +{ + VERBOSE(LIR) << "* Cast" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Cast::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Comparison &node) +{ + VERBOSE(LIR) << "* Comparison" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Comparison::Input::INPUT0).value() + << ", " << node.getInputs().at(Comparison::Input::INPUT1).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Concat &node) +{ + VERBOSE(LIR) << "* Concat" << std::endl; + std::string inputs; + for (auto i : node.getInputs()) + { + inputs += std::to_string(i.value()) + ","; + } + VERBOSE(LIR) << " - Inputs : IFM(" << inputs << ")" << std::endl; + VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Conv2D &node) +{ + std::string padding_type = + node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit"; + VERBOSE(LIR) << "* Conv2D(" << padding_type << ")" << std::endl; + VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Conv2D::Input::INPUT).value() + << ") Kernel(" << node.getInputs().at(Conv2D::Input::KERNEL).value() << ") Bias(" + << node.getInputs().at(Conv2D::Input::BIAS).value() << ")" << std::endl; + VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const DepthToSpace &node) +{ + VERBOSE(LIR) << "* DepthToSpace" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(DepthToSpace::Input::INPUT).value() + << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const DepthwiseConv2D &node) +{ + std::string padding_type = + node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit"; + VERBOSE(LIR) << "* DepthwiseConv2D(" << padding_type << ")" << std::endl; + VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(DepthwiseConv2D::Input::INPUT).value() + << ") Kernel(" << node.getInputs().at(DepthwiseConv2D::Input::KERNEL).value() + << ") Bias(" << node.getInputs().at(DepthwiseConv2D::Input::BIAS).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Dequantize &node) +{ + VERBOSE(LIR) << "* Dequantize" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Dequantize::Input::INPUT).value() + << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Div &node) +{ + VERBOSE(LIR) << "* Div" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Div::Input::LHS).value() << ", " + << node.getInputs().at(Div::Input::RHS).value() << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const EmbeddingLookup &node) +{ + VERBOSE(LIR) << "* EmbeddingLookup" << std::endl; + VERBOSE(LIR) << " - Inputs : Lookups(" + << node.getInputs().at(EmbeddingLookup::Input::LOOKUPS).value() << ") VALUES(" + << node.getInputs().at(EmbeddingLookup::Input::VALUES).value() << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Exp &node) +{ + VERBOSE(LIR) << "* Exp" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Exp::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Floor &node) +{ + VERBOSE(LIR) << "* Floor" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Floor::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const FullyConnected &node) +{ + VERBOSE(LIR) << "* FullyConnected" << std::endl; + VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(FullyConnected::Input::INPUT).value() + << ") Weight(" << node.getInputs().at(FullyConnected::Input::WEIGHT).value() + << ") Bias(" << node.getInputs().at(FullyConnected::Input::BIAS).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Gather &node) +{ + VERBOSE(LIR) << "* Gather" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Gather::Input::INPUT).value() + << ") Indices(" << node.getInputs().at(Gather::Input::INDICES).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const HashtableLookup &node) +{ + VERBOSE(LIR) << "* HashTableLookup" << std::endl; + VERBOSE(LIR) << " - Inputs : Lookups(" + << node.getInputs().at(HashtableLookup::Input::LOOKUPS).value() << ") Keys(" + << node.getInputs().at(HashtableLookup::Input::KEYS).value() << ") Values(" + << node.getInputs().at(HashtableLookup::Input::VALUES).value() << ")" << std::endl; + VERBOSE(LIR) << " - Outputs : Output(" + << node.getInputs().at(HashtableLookup::Output::OUTPUT).value() << ") Hits(" + << node.getInputs().at(HashtableLookup::Output::HITS).value() << ")" << std::endl; +} + +void Dumper::visit(const InstanceNorm &node) +{ + VERBOSE(LIR) << "* InstanceNorm" << std::endl; + VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(InstanceNorm::Input::INPUT).value() + << ") Gamma(" << node.getInputs().at(InstanceNorm::Input::GAMMA).value() << ") Beta(" + << node.getInputs().at(InstanceNorm::Input::BETA).value() << ") Epsilon(" + << node.param().epsilon << ")" << std::endl; + VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const L2Normalization &node) +{ + VERBOSE(LIR) << "* L2Normalization" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" + << node.getInputs().at(L2Normalization::Input::INPUT).value() << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const L2Pool2D &node) +{ + VERBOSE(LIR) << "* L2Pool2D" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(L2Pool2D::Input::INPUT).value() + << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const LocalResponseNormalization &node) +{ + VERBOSE(LIR) << "* LocalResponseNormalization" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" + << node.getInputs().at(LocalResponseNormalization::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const LSTM &node) +{ + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LSTM::Input::INPUT).value() + << ") Input To Input Weights(" + << node.getInputs().at(LSTM::Input::INPUT_TO_INPUT_WEIGHTS).value() + << ") Input To Forget Weights(" + << node.getInputs().at(LSTM::Input::INPUT_TO_FORGET_WEIGHTS).value() + << ") Input To Cell Weights(" + << node.getInputs().at(LSTM::Input::INPUT_TO_CELL_WEIGHTS).value() + << ") Input To Output Weights(" + << node.getInputs().at(LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS).value() + << ") Recurrent To Input Weights(" + << node.getInputs().at(LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS).value() + << ") Recurrent To Forget Weights(" + << node.getInputs().at(LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS).value() + << ") Recurrent To Cell Weights(" + << node.getInputs().at(LSTM::Input::RECURRENT_TO_CELL_WEIGHTS).value() + << ") Recurrent To Output Weights(" + << node.getInputs().at(LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS).value() + << ") Cell To Input Weights(" + << node.getInputs().at(LSTM::Input::CELL_TO_INPUT_WEIGHTS).value() + << ") Cell To Forget Weights(" + << node.getInputs().at(LSTM::Input::CELL_TO_FORGET_WEIGHTS).value() + << ") Cell To OUTPUT Weights(" + << node.getInputs().at(LSTM::Input::CELL_TO_OUTPUT_WEIGHTS).value() + << ") Input Gate Bias(" << node.getInputs().at(LSTM::Input::INPUT_GATE_BIAS).value() + << ") Forget Gate Bias(" + << node.getInputs().at(LSTM::Input::FORGET_GATE_BIAS).value() << ") Cell Bias(" + << node.getInputs().at(LSTM::Input::CELL_BIAS).value() << ") Output Gate Bias(" + << node.getInputs().at(LSTM::Input::OUTPUT_GATE_BIAS).value() + << ") Projection Weights(" + << node.getInputs().at(LSTM::Input::PROJECTION_WEIGHTS).value() + << ") Projection Bias(" << node.getInputs().at(LSTM::Input::PROJECTION_BIAS).value() + << ") Output State In(" << node.getInputs().at(LSTM::Input::OUTPUT_STATE_IN).value() + << ") Cell State In(" << node.getInputs().at(LSTM::Input::CELL_STATE_IN).value() + << ")" << std::endl; + VERBOSE(LIR) << " - Output : Scratch Buffer(" + << node.getOutputs().at(LSTM::Output::SCRATCH_BUFFER).value() + << ") Output State Out(" + << node.getInputs().at(LSTM::Output::OUTPUT_STATE_OUT).value() << ") Cell State Out(" + << node.getInputs().at(LSTM::Output::CELL_STATE_OUT).value() << ") Output(" + << node.getInputs().at(LSTM::Output::OUTPUT).value() << ")" << std::endl; +} + +void Dumper::visit(const LogicalAnd &node) +{ + VERBOSE(LIR) << "* LogicalAnd" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalAnd::Input::INPUT0).value() + << ", " << node.getInputs().at(LogicalAnd::Input::INPUT1).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const LogicalNot &node) +{ + VERBOSE(LIR) << "* LogicalNot" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalNot::Input::INPUT).value() + << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const LogicalOr &node) +{ + VERBOSE(LIR) << "* LogicalOr" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalOr::Input::INPUT0).value() + << ", " << node.getInputs().at(LogicalOr::Input::INPUT1).value() << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Logistic &node) +{ + VERBOSE(LIR) << "* Logistic" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Logistic::Input::INPUT).value() + << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const MaxPool2D &node) +{ + std::string padding_type = + node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit"; + VERBOSE(LIR) << "* MaxPool2D(" << padding_type << ")" << std::endl; + VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(MaxPool2D::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Mean &node) +{ + VERBOSE(LIR) << "* Mean" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Mean::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Mul &node) +{ + VERBOSE(LIR) << "* Mul" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Mul::Input::LHS).value() << ", " + << node.getInputs().at(Mul::Input::RHS).value() << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Neg &node) +{ + VERBOSE(LIR) << "* Neg" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Neg::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Pack &node) +{ + VERBOSE(LIR) << "* Pack" << std::endl; + std::string inputs; + const auto &input_indices = node.getInputs(); + for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it) + { + inputs += std::to_string(it->value()); + if (std::next(it) != std::end(input_indices)) + inputs += ", "; + } + VERBOSE(LIR) << " - Inputs : Inputs(" << inputs << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Permute &node) +{ + std::string permute_type = "Unknown"; + switch (node.getPermuteType()) + { + case Permute::Type::COPY: + permute_type = "Copy"; + break; + case Permute::Type::NHWC_TO_NCHW: + permute_type = "NHWC to NCHW"; + break; + case Permute::Type::NCHW_TO_NHWC: + permute_type = "NCHW to NHWC"; + break; + } + + VERBOSE(LIR) << "* Permute(" + permute_type + ")" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0).value() << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const PReLU &node) +{ + VERBOSE(LIR) << "* PReLU" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(PReLU::Input::INPUT).value() + << ") Alpha(" << node.getInputs().at(PReLU::Input::ALPHA).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const ReduceMax &node) +{ + VERBOSE(LIR) << "* ReduceMax" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReduceMax::Input::INPUT).value() + << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const ReduceMin &node) +{ + VERBOSE(LIR) << "* ReduceMin" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReduceMin::Input::INPUT).value() + << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const ReduceSum &node) +{ + VERBOSE(LIR) << "* ReduceSum" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReduceSum::Input::INPUT).value() + << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const ReLU &node) +{ + VERBOSE(LIR) << "* ReLU" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const ReLU1 &node) +{ + VERBOSE(LIR) << "* ReLU1" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU1::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const ReLU6 &node) +{ + VERBOSE(LIR) << "* ReLU6" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU6::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Reshape &node) +{ + VERBOSE(LIR) << "* Reshape" << std::endl; + // TODO The shape index should be "node.getInputs().at(1).value()" but not valid for now + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Reshape::Input::INPUT).value() + << ") Shape(" + << "?" + << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const ResizeBilinear &node) +{ + VERBOSE(LIR) << "* ResizeBilinear" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ResizeBilinear::Input::INPUT).value() + << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const RNN &node) +{ + VERBOSE(LIR) << "* RNN" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(RNN::Input::INPUT).value() + << ") Weights" << node.getInputs().at(RNN::Input::WEIGHTS).value() + << ") Recurrent Weights" + << node.getInputs().at(RNN::Input::RECURRENT_WEIGHTS).value() << ") Bias" + << node.getInputs().at(RNN::Input::BIAS).value() << ") Hidden State" + << node.getInputs().at(RNN::Input::HIDDEN_STATE_IN).value() << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(RNN::Output::OUTPUT).value() + << ") Hidden State" << node.getInputs().at(RNN::Output::HIDDEN_STATE_OUT).value() + << ")" << std::endl; +} + +void Dumper::visit(const RSQRT &node) +{ + VERBOSE(LIR) << "* RSQRT" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(RSQRT::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Softmax &node) +{ + VERBOSE(LIR) << "* Softmax" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Softmax::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const SpaceToDepth &node) +{ + VERBOSE(LIR) << "* SpaceToDepth" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SpaceToDepth::Input::INPUT).value() + << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Split &node) +{ + VERBOSE(LIR) << "* Split" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Split::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const SQRT &node) +{ + VERBOSE(LIR) << "* SQRT" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SQRT::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const SquaredDifference &node) +{ + VERBOSE(LIR) << "* SquaredDifference" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" + << node.getInputs().at(SquaredDifference::Input::LHS).value() << ", " + << node.getInputs().at(SquaredDifference::Input::RHS).value() << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Squeeze &node) +{ + VERBOSE(LIR) << "* Squeeze" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Squeeze::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Slice &node) +{ + VERBOSE(LIR) << "* Slice" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Slice::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const StridedSlice &node) +{ + VERBOSE(LIR) << "* StridedSlice" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(StridedSlice::Input::INPUT).value() + << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Sub &node) +{ + VERBOSE(LIR) << "* Sub" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Sub::Input::LHS).value() << ", " + << node.getInputs().at(Sub::Input::RHS).value() << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Tanh &node) +{ + VERBOSE(LIR) << "* TanH" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Tanh::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const TopKV2 &node) +{ + VERBOSE(LIR) << "* TopKV2" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(TopKV2::Input::INPUT).value() << ")" + << std::endl; + VERBOSE(LIR) << " - Outputs : Values(" + << node.getOutputs().at(TopKV2::Output::OUTPUT_VALUES).value() << ") Indices(" + << node.getOutputs().at(TopKV2::Output::OUTPUT_INDICES).value() << ")" << std::endl; +} + +void Dumper::visit(const TransposeConv &node) +{ + std::string padding_type = + node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit"; + VERBOSE(LIR) << "* TransposeConv(" << padding_type << ")" << std::endl; + VERBOSE(LIR) << " - Inputs : Output Shape(" + << node.getInputs().at(TransposeConv::Input::OUTPUT_SHAPE).value() << ") KERNEL(" + << node.getInputs().at(TransposeConv::Input::KERNEL).value() << ") IFM(" + << node.getInputs().at(TransposeConv::Input::INPUT).value() << ")" << std::endl; + VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Transpose &node) +{ + VERBOSE(LIR) << "* Transpose" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Transpose::Input::INPUT).value() + << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Unpack &node) +{ + VERBOSE(LIR) << "* Unpack" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Unpack::Input::INPUT).value() << ")" + << std::endl; + std::string outputs; + const auto &output_indices = node.getOutputs(); + for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it) + { + outputs += std::to_string(it->value()); + if (std::next(it) != std::end(output_indices)) + outputs += ", "; + } + VERBOSE(LIR) << " - Outputs : Outputs(" << outputs << ")" << std::endl; +} + +void Dumper::visit(const Min &node) +{ + VERBOSE(LIR) << "* Min" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Min::Input::LHS).value() << ", " + << node.getInputs().at(Min::Input::RHS).value() << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const Max &node) +{ + VERBOSE(LIR) << "* Max" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Max::Input::LHS).value() << ", " + << node.getInputs().at(Max::Input::RHS).value() << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +void Dumper::visit(const OneHot &node) +{ + VERBOSE(LIR) << "* OneHot" << std::endl; + VERBOSE(LIR) << " - Inputs : " + << "Indices(" << node.getInputs().at(OneHot::Input::INDICES).value() << ") " + << "Depth(" << node.getInputs().at(OneHot::Input::DEPTH).value() << ") " + << "OnValue(" << node.getInputs().at(OneHot::Input::ON_VALUE).value() << ") " + << "OffValue(" << node.getInputs().at(OneHot::Input::OFF_VALUE).value() << ") " + << "Axis(" << node.param().axis << ") " << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl; +} + +} // namespace dumper +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/dumper/Dumper.h b/runtime/neurun/core/src/ir/dumper/Dumper.h new file mode 100644 index 000000000..458f1c81f --- /dev/null +++ b/runtime/neurun/core/src/ir/dumper/Dumper.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_GRAPH_DUMPER_H__ +#define __NEURUN_GRAPH_DUMPER_H__ + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace dumper +{ + +class Dumper : public OperationVisitor +{ +public: + Dumper() = default; + +public: + void visit(const operation::Abs &) override; + void visit(const operation::Add &node) override; + void visit(const operation::ArgMax &) override; + void visit(const operation::AvgPool2D &node) override; + void visit(const operation::Cast &) override; + void visit(const operation::Comparison &) override; + void visit(const operation::Concat &node) override; + void visit(const operation::Conv2D &node) override; + void visit(const operation::DepthToSpace &) override; + void visit(const operation::DepthwiseConv2D &node) override; + void visit(const operation::Dequantize &) override; + void visit(const operation::Div &) override; + void visit(const operation::EmbeddingLookup &) override; + void visit(const operation::Exp &) override; + void visit(const operation::Floor &) override; + void visit(const operation::FullyConnected &node) override; + void visit(const operation::Gather &) override; + void visit(const operation::HashtableLookup &) override; + void visit(const operation::InstanceNorm &) override; + void visit(const operation::L2Normalization &) override; + void visit(const operation::L2Pool2D &) override; + void visit(const operation::LocalResponseNormalization &) override; + void visit(const operation::LogicalAnd &) override; + void visit(const operation::LogicalNot &) override; + void visit(const operation::LogicalOr &) override; + void visit(const operation::Logistic &) override; + void visit(const operation::LSTM &) override; + void visit(const operation::MaxPool2D &node) override; + void visit(const operation::Mean &) override; + void visit(const operation::Mul &) override; + void visit(const operation::Neg &) override; + void visit(const operation::Pack &) override; + void visit(const operation::Permute &node) override; + void visit(const operation::PReLU &) override; + void visit(const operation::ReduceMax &) override; + void visit(const operation::ReduceMin &) override; + void visit(const operation::ReduceSum &) override; + void visit(const operation::ReLU &) override; + void visit(const operation::ReLU1 &) override; + void visit(const operation::ReLU6 &) override; + void visit(const operation::Reshape &node) override; + void visit(const operation::ResizeBilinear &) override; + void visit(const operation::RNN &) override; + void visit(const operation::RSQRT &) override; + void visit(const operation::Softmax &node) override; + void visit(const operation::SpaceToDepth &) override; + void visit(const operation::Split &) override; + void visit(const operation::SQRT &) override; + void visit(const operation::SquaredDifference &) override; + void visit(const operation::Squeeze &) override; + void visit(const operation::Slice &) override; + void visit(const operation::StridedSlice &) override; + void visit(const operation::Sub &) override; + void visit(const operation::Tanh &) override; + void visit(const operation::TopKV2 &) override; + void visit(const operation::TransposeConv &) override; + void visit(const operation::Transpose &) override; + void visit(const operation::Unpack &) override; + void visit(const operation::Min &) override; + void visit(const operation::Max &) override; + void visit(const operation::OneHot &) override; +}; + +} // namespace dumper +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_GRAPH_DUMPER_H__ diff --git a/runtime/neurun/core/src/ir/operand/Shape4DConvert.h b/runtime/neurun/core/src/ir/operand/Shape4DConvert.h new file mode 100644 index 000000000..feffee89f --- /dev/null +++ b/runtime/neurun/core/src/ir/operand/Shape4DConvert.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_GRAPH_OPERAND_SHAPE4D_CONVERT_H__ +#define __NEURUN_GRAPH_OPERAND_SHAPE4D_CONVERT_H__ + +#include "ir/operand/LowerInfo.h" + +namespace neurun +{ +namespace ir +{ +namespace operand +{ + +inline LowerInfo::Shape4D asShape4D(const Shape &shape) +{ + switch (shape.rank()) + { + case 0u: + return LowerInfo::Shape4D(1, 1, 1, 1); + + case 1u: + return LowerInfo::Shape4D(1, 1, 1, shape.dim(0)); + + case 2u: + return LowerInfo::Shape4D(1, 1, shape.dim(0), shape.dim(1)); + + case 3u: + return LowerInfo::Shape4D(1, shape.dim(0), shape.dim(1), shape.dim(2)); + + case 4u: + return LowerInfo::Shape4D(shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3)); + + default: + throw "Unsupported rank > 4"; + } +} + +} // namespace operand +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_GRAPH_OPERAND_SHAPE4D_CONVERT_H__ diff --git a/runtime/neurun/core/src/ir/operation/Abs.cc b/runtime/neurun/core/src/ir/operation/Abs.cc new file mode 100644 index 000000000..9506f83d2 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Abs.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Abs.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Abs::accept(OperationVisitor &v) const { v.visit(*this); } + +Abs::Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(1u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Add.cc b/runtime/neurun/core/src/ir/operation/Add.cc new file mode 100644 index 000000000..a7c40c37a --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Add.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Add.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Add::accept(OperationVisitor &v) const { v.visit(*this); } + +Add::Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/ArgMax.cc b/runtime/neurun/core/src/ir/operation/ArgMax.cc new file mode 100644 index 000000000..200abc7dd --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/ArgMax.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/ArgMax.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void ArgMax::accept(OperationVisitor &v) const { v.visit(*this); } + +ArgMax::ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/AvgPool2D.cc b/runtime/neurun/core/src/ir/operation/AvgPool2D.cc new file mode 100644 index 000000000..21ec052eb --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/AvgPool2D.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/AvgPool2D.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void AvgPool2D::accept(OperationVisitor &v) const { v.visit(*this); } + +AvgPool2D::AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/BatchToSpaceND.cc b/runtime/neurun/core/src/ir/operation/BatchToSpaceND.cc new file mode 100644 index 000000000..042144c12 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/BatchToSpaceND.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/BatchToSpaceND.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void BatchToSpaceND::accept(OperationVisitor &v) const { v.visit(*this); } + +BatchToSpaceND::BatchToSpaceND(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(2u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Cast.cc b/runtime/neurun/core/src/ir/operation/Cast.cc new file mode 100644 index 000000000..095225eca --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Cast.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Cast.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Cast::accept(OperationVisitor &v) const { v.visit(*this); } + +Cast::Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(1u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Comparison.cc b/runtime/neurun/core/src/ir/operation/Comparison.cc new file mode 100644 index 000000000..995d56764 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Comparison.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Comparison.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Comparison::accept(OperationVisitor &v) const { v.visit(*this); } + +Comparison::Comparison(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Concat.cc b/runtime/neurun/core/src/ir/operation/Concat.cc new file mode 100644 index 000000000..1772da1fc --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Concat.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Concat.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Concat::accept(OperationVisitor &v) const { v.visit(*this); } + +Concat::Concat(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Conv2D.cc b/runtime/neurun/core/src/ir/operation/Conv2D.cc new file mode 100644 index 000000000..505e916a9 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Conv2D.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Conv2D.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Conv2D::accept(OperationVisitor &v) const { v.visit(*this); } + +Conv2D::Conv2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Custom.cc b/runtime/neurun/core/src/ir/operation/Custom.cc new file mode 100644 index 000000000..67f36d588 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Custom.cc @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Custom.h" + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Custom::accept(OperationVisitor &v) const { v.visit(*this); } + +Custom::Custom(OperandConstraint input_constr, const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs, std::string id, const Userdata &userdata) + : Operation{input_constr, inputs, outputs}, _id(std::move(id)), _userdata(userdata) +{ +} + +const std::string &Custom::id() const { return _id; } + +const Custom::Userdata &Custom::userdata() const { return _userdata; } + +Custom::~Custom() { delete[] _userdata.data; } + +std::string Custom::name() const { return id(); } + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/DepthToSpace.cc b/runtime/neurun/core/src/ir/operation/DepthToSpace.cc new file mode 100644 index 000000000..fd1d1f1aa --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/DepthToSpace.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/DepthToSpace.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void DepthToSpace::accept(OperationVisitor &v) const { v.visit(*this); } + +DepthToSpace::DepthToSpace(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/DepthwiseConv2D.cc b/runtime/neurun/core/src/ir/operation/DepthwiseConv2D.cc new file mode 100644 index 000000000..ed76594a3 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/DepthwiseConv2D.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/DepthwiseConv2D.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void DepthwiseConv2D::accept(OperationVisitor &v) const { v.visit(*this); } + +DepthwiseConv2D::DepthwiseConv2D(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs, const Param ¶m) + : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Dequantize.cc b/runtime/neurun/core/src/ir/operation/Dequantize.cc new file mode 100644 index 000000000..e99a59cb7 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Dequantize.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Dequantize.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Dequantize::accept(OperationVisitor &v) const { v.visit(*this); } + +Dequantize::Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(1u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Div.cc b/runtime/neurun/core/src/ir/operation/Div.cc new file mode 100644 index 000000000..484406ff3 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Div.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Div.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Div::accept(OperationVisitor &v) const { v.visit(*this); } + +Div::Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/EmbeddingLookup.cc b/runtime/neurun/core/src/ir/operation/EmbeddingLookup.cc new file mode 100644 index 000000000..206e6bfaa --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/EmbeddingLookup.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/EmbeddingLookup.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void EmbeddingLookup::accept(OperationVisitor &v) const { v.visit(*this); } + +EmbeddingLookup::EmbeddingLookup(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(2u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Exp.cc b/runtime/neurun/core/src/ir/operation/Exp.cc new file mode 100644 index 000000000..3c0e0cf9b --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Exp.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Exp.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Exp::accept(OperationVisitor &v) const { v.visit(*this); } + +Exp::Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(1u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Floor.cc b/runtime/neurun/core/src/ir/operation/Floor.cc new file mode 100644 index 000000000..75373cd41 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Floor.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Floor.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Floor::accept(OperationVisitor &v) const { v.visit(*this); } + +Floor::Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(1u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/FullyConnected.cc b/runtime/neurun/core/src/ir/operation/FullyConnected.cc new file mode 100644 index 000000000..9560c0593 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/FullyConnected.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/FullyConnected.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void FullyConnected::accept(OperationVisitor &v) const { v.visit(*this); } + +FullyConnected::FullyConnected(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs, const Param ¶m) + : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Gather.cc b/runtime/neurun/core/src/ir/operation/Gather.cc new file mode 100644 index 000000000..f98cef9ae --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Gather.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Gather.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Gather::accept(OperationVisitor &v) const { v.visit(*this); } + +Gather::Gather(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/HashtableLookup.cc b/runtime/neurun/core/src/ir/operation/HashtableLookup.cc new file mode 100644 index 000000000..ecb9d3195 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/HashtableLookup.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/HashtableLookup.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void HashtableLookup::accept(OperationVisitor &v) const { v.visit(*this); } + +HashtableLookup::HashtableLookup(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(3u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/InstanceNorm.cc b/runtime/neurun/core/src/ir/operation/InstanceNorm.cc new file mode 100644 index 000000000..69e47abd4 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/InstanceNorm.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/InstanceNorm.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void InstanceNorm::accept(OperationVisitor &v) const { v.visit(*this); } + +InstanceNorm::InstanceNorm(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/L2Normalization.cc b/runtime/neurun/core/src/ir/operation/L2Normalization.cc new file mode 100644 index 000000000..67085989e --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/L2Normalization.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/L2Normalization.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void L2Normalization::accept(OperationVisitor &v) const { v.visit(*this); } + +L2Normalization::L2Normalization(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs, const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/L2Pool2D.cc b/runtime/neurun/core/src/ir/operation/L2Pool2D.cc new file mode 100644 index 000000000..0815cb5ab --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/L2Pool2D.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/L2Pool2D.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void L2Pool2D::accept(OperationVisitor &v) const { v.visit(*this); } + +L2Pool2D::L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/LSTM.cc b/runtime/neurun/core/src/ir/operation/LSTM.cc new file mode 100644 index 000000000..58e2aa32e --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/LSTM.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/LSTM.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void LSTM::accept(OperationVisitor &v) const { v.visit(*this); } + +LSTM::LSTM(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(23u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/LocalResponseNormalization.cc b/runtime/neurun/core/src/ir/operation/LocalResponseNormalization.cc new file mode 100644 index 000000000..dcba7f1cb --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/LocalResponseNormalization.cc @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/LocalResponseNormalization.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void LocalResponseNormalization::accept(OperationVisitor &v) const { v.visit(*this); } + +LocalResponseNormalization::LocalResponseNormalization(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/LogicalAnd.cc b/runtime/neurun/core/src/ir/operation/LogicalAnd.cc new file mode 100644 index 000000000..51f4f0ee0 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/LogicalAnd.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/LogicalAnd.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void LogicalAnd::accept(OperationVisitor &v) const { v.visit(*this); } + +LogicalAnd::LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(2u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/LogicalNot.cc b/runtime/neurun/core/src/ir/operation/LogicalNot.cc new file mode 100644 index 000000000..48c25142a --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/LogicalNot.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/LogicalNot.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void LogicalNot::accept(OperationVisitor &v) const { v.visit(*this); } + +LogicalNot::LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(1u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/LogicalOr.cc b/runtime/neurun/core/src/ir/operation/LogicalOr.cc new file mode 100644 index 000000000..663b7deb5 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/LogicalOr.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/LogicalOr.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void LogicalOr::accept(OperationVisitor &v) const { v.visit(*this); } + +LogicalOr::LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(2u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Logistic.cc b/runtime/neurun/core/src/ir/operation/Logistic.cc new file mode 100644 index 000000000..3ed2f3453 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Logistic.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Logistic.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Logistic::accept(OperationVisitor &v) const { v.visit(*this); } + +Logistic::Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(1u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/LowerInfo.cc b/runtime/neurun/core/src/ir/operation/LowerInfo.cc new file mode 100644 index 000000000..6133be3f8 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/LowerInfo.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/LowerInfo.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +LowerInfo::LowerInfo(const backend::Backend *backend, Layout layout) + : _permute_factor{backend, layout} +{ + // DO NOTHING +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Max.cc b/runtime/neurun/core/src/ir/operation/Max.cc new file mode 100644 index 000000000..be4bdd365 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Max.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Max.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Max::accept(OperationVisitor &v) const { v.visit(*this); } + +Max::Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(2u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/MaxPool2D.cc b/runtime/neurun/core/src/ir/operation/MaxPool2D.cc new file mode 100644 index 000000000..8f1b70cd6 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/MaxPool2D.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/MaxPool2D.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void MaxPool2D::accept(OperationVisitor &v) const { v.visit(*this); } + +MaxPool2D::MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Mean.cc b/runtime/neurun/core/src/ir/operation/Mean.cc new file mode 100644 index 000000000..016b5dd85 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Mean.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Mean.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Mean::accept(OperationVisitor &v) const { v.visit(*this); } + +Mean::Mean(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Min.cc b/runtime/neurun/core/src/ir/operation/Min.cc new file mode 100644 index 000000000..a864405dc --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Min.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Min.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Min::accept(OperationVisitor &v) const { v.visit(*this); } + +Min::Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(2u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Mul.cc b/runtime/neurun/core/src/ir/operation/Mul.cc new file mode 100644 index 000000000..0b2d67a9d --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Mul.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Mul.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Mul::accept(OperationVisitor &v) const { v.visit(*this); } + +Mul::Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Neg.cc b/runtime/neurun/core/src/ir/operation/Neg.cc new file mode 100644 index 000000000..65922d57c --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Neg.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Neg.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Neg::accept(OperationVisitor &v) const { v.visit(*this); } + +Neg::Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(1u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/OneHot.cc b/runtime/neurun/core/src/ir/operation/OneHot.cc new file mode 100644 index 000000000..0ba3c9d60 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/OneHot.cc @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/OneHot.h" +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void OneHot::accept(OperationVisitor &v) const { v.visit(*this); } + +OneHot::OneHot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/PReLU.cc b/runtime/neurun/core/src/ir/operation/PReLU.cc new file mode 100644 index 000000000..b8555ccbd --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/PReLU.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/PReLU.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void PReLU::accept(OperationVisitor &v) const { v.visit(*this); } + +PReLU::PReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(2u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Pack.cc b/runtime/neurun/core/src/ir/operation/Pack.cc new file mode 100644 index 000000000..412c744ea --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Pack.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "ir/operation/Pack.h" +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ +void Pack::accept(OperationVisitor &v) const { v.visit(*this); } +Pack::Pack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createAtLeast(3u), inputs, outputs}, _param{param} +{ +} +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Pad.cc b/runtime/neurun/core/src/ir/operation/Pad.cc new file mode 100644 index 000000000..a08be12a6 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Pad.cc @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Pad.h" + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Pad::accept(OperationVisitor &v) const { v.visit(*this); } + +Pad::Pad(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Permute.cc b/runtime/neurun/core/src/ir/operation/Permute.cc new file mode 100644 index 000000000..ec3d969c8 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Permute.cc @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Permute.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Permute::accept(OperationVisitor &v) const { v.visit(*this); } + +Permute::Permute(const OperandIndex &input, const OperandIndex &output, + const backend::BackendContext *input_backend_ctx, + const backend::BackendContext *output_backend_ctx, Type type, DataType data_type) + : Operation{OperandConstraint::createExact(1u)}, _param{input_backend_ctx, output_backend_ctx}, + _type{type}, _dataType{data_type} +{ + setInputs({input}); + setOutputs({output}); +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/RNN.cc b/runtime/neurun/core/src/ir/operation/RNN.cc new file mode 100644 index 000000000..8db5cbceb --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/RNN.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/RNN.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void RNN::accept(OperationVisitor &v) const { v.visit(*this); } + +RNN::RNN(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/RSQRT.cc b/runtime/neurun/core/src/ir/operation/RSQRT.cc new file mode 100644 index 000000000..ec13b20ec --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/RSQRT.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/RSQRT.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void RSQRT::accept(OperationVisitor &v) const { v.visit(*this); } + +RSQRT::RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(1u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/ReLU.cc b/runtime/neurun/core/src/ir/operation/ReLU.cc new file mode 100644 index 000000000..6b3f7e72d --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/ReLU.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/ReLU.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void ReLU::accept(OperationVisitor &v) const { v.visit(*this); } + +ReLU::ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(1u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/ReLU1.cc b/runtime/neurun/core/src/ir/operation/ReLU1.cc new file mode 100644 index 000000000..d7b4e1b11 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/ReLU1.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/ReLU1.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void ReLU1::accept(OperationVisitor &v) const { v.visit(*this); } + +ReLU1::ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(1u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/ReLU6.cc b/runtime/neurun/core/src/ir/operation/ReLU6.cc new file mode 100644 index 000000000..245eb923f --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/ReLU6.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/ReLU6.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void ReLU6::accept(OperationVisitor &v) const { v.visit(*this); } + +ReLU6::ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(1u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/ReduceMax.cc b/runtime/neurun/core/src/ir/operation/ReduceMax.cc new file mode 100644 index 000000000..b7ef2c5a9 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/ReduceMax.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/ReduceMax.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void ReduceMax::accept(OperationVisitor &v) const { v.visit(*this); } + +ReduceMax::ReduceMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/ReduceMin.cc b/runtime/neurun/core/src/ir/operation/ReduceMin.cc new file mode 100644 index 000000000..84d7e0cc5 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/ReduceMin.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/ReduceMin.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void ReduceMin::accept(OperationVisitor &v) const { v.visit(*this); } + +ReduceMin::ReduceMin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/ReduceSum.cc b/runtime/neurun/core/src/ir/operation/ReduceSum.cc new file mode 100644 index 000000000..7e3b19bd6 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/ReduceSum.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/ReduceSum.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void ReduceSum::accept(OperationVisitor &v) const { v.visit(*this); } + +ReduceSum::ReduceSum(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Reshape.cc b/runtime/neurun/core/src/ir/operation/Reshape.cc new file mode 100644 index 000000000..bae37e12f --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Reshape.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Reshape.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Reshape::accept(OperationVisitor &v) const { v.visit(*this); } + +Reshape::Reshape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(1u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/ResizeBilinear.cc b/runtime/neurun/core/src/ir/operation/ResizeBilinear.cc new file mode 100644 index 000000000..55ae4815d --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/ResizeBilinear.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/ResizeBilinear.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void ResizeBilinear::accept(OperationVisitor &v) const { v.visit(*this); } + +ResizeBilinear::ResizeBilinear(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs, const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/SQRT.cc b/runtime/neurun/core/src/ir/operation/SQRT.cc new file mode 100644 index 000000000..6c6daa3a0 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/SQRT.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/SQRT.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void SQRT::accept(OperationVisitor &v) const { v.visit(*this); } + +SQRT::SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(1u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Slice.cc b/runtime/neurun/core/src/ir/operation/Slice.cc new file mode 100644 index 000000000..88014d1e4 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Slice.cc @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Slice.h" +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Slice::accept(OperationVisitor &v) const { v.visit(*this); } + +Slice::Slice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Softmax.cc b/runtime/neurun/core/src/ir/operation/Softmax.cc new file mode 100644 index 000000000..6b3a6b164 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Softmax.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Softmax.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Softmax::accept(OperationVisitor &v) const { v.visit(*this); } + +Softmax::Softmax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/SpaceToBatchND.cc b/runtime/neurun/core/src/ir/operation/SpaceToBatchND.cc new file mode 100644 index 000000000..a07453504 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/SpaceToBatchND.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/SpaceToBatchND.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void SpaceToBatchND::accept(OperationVisitor &v) const { v.visit(*this); } + +SpaceToBatchND::SpaceToBatchND(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(3u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/SpaceToDepth.cc b/runtime/neurun/core/src/ir/operation/SpaceToDepth.cc new file mode 100644 index 000000000..ca16bd92f --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/SpaceToDepth.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/SpaceToDepth.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void SpaceToDepth::accept(OperationVisitor &v) const { v.visit(*this); } + +SpaceToDepth::SpaceToDepth(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Split.cc b/runtime/neurun/core/src/ir/operation/Split.cc new file mode 100644 index 000000000..a4b15a9b2 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Split.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "ir/operation/Split.h" +#include <cassert> +#include "ir/OperationVisitor.h" +namespace neurun +{ +namespace ir +{ +namespace operation +{ +void Split::accept(OperationVisitor &v) const { v.visit(*this); } +Split::Split(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/SquaredDifference.cc b/runtime/neurun/core/src/ir/operation/SquaredDifference.cc new file mode 100644 index 000000000..141fb7560 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/SquaredDifference.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/SquaredDifference.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void SquaredDifference::accept(OperationVisitor &v) const { v.visit(*this); } + +SquaredDifference::SquaredDifference(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(2u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Squeeze.cc b/runtime/neurun/core/src/ir/operation/Squeeze.cc new file mode 100644 index 000000000..22ee5763d --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Squeeze.cc @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Squeeze.h" +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Squeeze::accept(OperationVisitor &v) const { v.visit(*this); } + +Squeeze::Squeeze(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param(param) +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/StridedSlice.cc b/runtime/neurun/core/src/ir/operation/StridedSlice.cc new file mode 100644 index 000000000..f764dccc0 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/StridedSlice.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/StridedSlice.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void StridedSlice::accept(OperationVisitor &v) const { v.visit(*this); } + +StridedSlice::StridedSlice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Sub.cc b/runtime/neurun/core/src/ir/operation/Sub.cc new file mode 100644 index 000000000..7d83e3d74 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Sub.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Sub.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Sub::accept(OperationVisitor &v) const { v.visit(*this); } + +Sub::Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Tanh.cc b/runtime/neurun/core/src/ir/operation/Tanh.cc new file mode 100644 index 000000000..a3125e947 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Tanh.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Tanh.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Tanh::accept(OperationVisitor &v) const { v.visit(*this); } + +Tanh::Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(1u), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/TopKV2.cc b/runtime/neurun/core/src/ir/operation/TopKV2.cc new file mode 100644 index 000000000..6fabd34a3 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/TopKV2.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/TopKV2.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void TopKV2::accept(OperationVisitor &v) const { v.visit(*this); } + +TopKV2::TopKV2(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Transpose.cc b/runtime/neurun/core/src/ir/operation/Transpose.cc new file mode 100644 index 000000000..74239b0f6 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Transpose.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Transpose.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void Transpose::accept(OperationVisitor &v) const { v.visit(*this); } + +Transpose::Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/TransposeConv.cc b/runtime/neurun/core/src/ir/operation/TransposeConv.cc new file mode 100644 index 000000000..30664e974 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/TransposeConv.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/TransposeConv.h" + +#include <cassert> + +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ + +void TransposeConv::accept(OperationVisitor &v) const { v.visit(*this); } + +TransposeConv::TransposeConv(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs, const Param ¶m) + : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/operation/Unpack.cc b/runtime/neurun/core/src/ir/operation/Unpack.cc new file mode 100644 index 000000000..7c2c24892 --- /dev/null +++ b/runtime/neurun/core/src/ir/operation/Unpack.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "ir/operation/Unpack.h" +#include "ir/OperationVisitor.h" + +namespace neurun +{ +namespace ir +{ +namespace operation +{ +void Unpack::accept(OperationVisitor &v) const { v.visit(*this); } +Unpack::Unpack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} +} // namespace operation +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/pass/ConstantInsertionPass.cc b/runtime/neurun/core/src/ir/pass/ConstantInsertionPass.cc new file mode 100644 index 000000000..8f8ebff1b --- /dev/null +++ b/runtime/neurun/core/src/ir/pass/ConstantInsertionPass.cc @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConstantInsertionPass.h" + +#include "backend/Backend.h" +#include <ir/Graph.h> +#include "ir/operand/Shape4DConvert.h" +#include <util/Utils.h> + +namespace neurun +{ +namespace ir +{ +namespace pass +{ + +void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation &node) +{ + const auto &subgraph_index = _graph.subgraphs().getOperation(node_index); + const auto subg_lower_info = _graph.getLowerInfo(subgraph_index); + const auto backend = subg_lower_info->backend(); + const auto layout = subg_lower_info->layout(); + const auto factor = operand::PermuteFactor{backend, layout}; + + for (const auto input : node.getInputs()) + { + auto &object = _graph.operands().at(input); + + if (object.isConstant()) + { + const auto key = ReplaceKey{input, factor}; + if (_replace_operands_map.count(key) == 0) + { + auto new_object = object; + // TODO Remove const_case + const_cast<std::list<OperationIndex> &>(new_object.getDef().list()).clear(); + const_cast<std::list<OperationIndex> &>(new_object.getUses().list()).clear(); + const auto new_index = _graph.operands().emplace(new_object); + _replace_operands_map[key] = new_index; + + _graph.setLowerInfo(new_index, nnfw::cpp14::make_unique<operand::LowerInfo>( + operand::asShape4D(new_object.shape()))); + _graph.getLowerInfo(new_index)->addDefPermuteFactor(factor); + } + + const auto replaced_input = _replace_operands_map[key]; + // Update op_seq + if (_graph.subgraphs().at(subgraph_index).getInputs().contains(input)) + { + _graph.subgraphs().at(subgraph_index).replaceInput(input, replaced_input); + } + + // Update node + node.replaceInput(input, replaced_input); + + // Update operand + auto &replaced_object = _graph.operands().at(replaced_input); + replaced_object.appendUse(node_index); + + // Update lower_info + auto replaced_lower_info = _graph.getLowerInfo(replaced_input); + replaced_lower_info->addUsePermuteFactor(factor); + + // Remove this node from def and uses of origin operand + if (object.getDef().contains(node_index)) + { + object.removeDef(node_index); + } + object.removeUse(node_index); + + // Remove origin operand + if (object.getDef().size() == 0 && object.getUses().size() == 0) + { + _graph.removeOperand(input); + _graph.removeLowerInfo(input); + } + } + } + + // Now this runtime does not support the node making output as constant + for (const auto &output : node.getOutputs()) + { + UNUSED_RELEASE(output); + assert(!_graph.operands().at(output).isConstant()); + } +} + +} // namespace pass +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/pass/ConstantInsertionPass.h b/runtime/neurun/core/src/ir/pass/ConstantInsertionPass.h new file mode 100644 index 000000000..40476b20e --- /dev/null +++ b/runtime/neurun/core/src/ir/pass/ConstantInsertionPass.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__ +#define __NEURUN_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__ + +#include <ir/operand/PermuteFactor.h> +#include <ir/Index.h> +#include "OperationPass.h" +#include <unordered_map> +#include <utility> + +namespace neurun +{ +namespace ir +{ +namespace pass +{ + +class ConstantInsertionPass : public OperationPass +{ +public: + using OperationPass::OperationPass; + +public: + std::string id() final { return "ConstantInsertionPass"; } + +public: + void callback(const OperationIndex &index, Operation &node) final; + +private: + struct ReplaceKey + { + OperandIndex index; + operand::PermuteFactor factor; + + bool operator==(const ReplaceKey &other) const + { + return index == other.index && factor == other.factor; + } + }; + + /** + * @brief Structure that provides hash function of ReplaceKey + */ + struct KeyHasher + { + std::size_t operator()(const ReplaceKey &key) const noexcept + { + using std::hash; + return hash<OperandIndex>()(key.index) ^ (hash<operand::PermuteFactor>()(key.factor) << 1); + } + }; + + std::unordered_map<ReplaceKey, OperandIndex, KeyHasher> _replace_operands_map; +}; + +} // namespace pass +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__ diff --git a/runtime/neurun/core/src/ir/pass/OperandPass.cc b/runtime/neurun/core/src/ir/pass/OperandPass.cc new file mode 100644 index 000000000..f31d0d850 --- /dev/null +++ b/runtime/neurun/core/src/ir/pass/OperandPass.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OperandPass.h" + +#include "ir/Graph.h" + +namespace neurun +{ +namespace ir +{ +namespace pass +{ + +void OperandPass::run() +{ + _graph.operands().iterate( + [&](const OperandIndex &index, Operand &object) { callback(index, object); }); +} + +} // namespace pass +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/pass/OperandPass.h b/runtime/neurun/core/src/ir/pass/OperandPass.h new file mode 100644 index 000000000..c9fbf541d --- /dev/null +++ b/runtime/neurun/core/src/ir/pass/OperandPass.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_GRAPH_PASS_OPERAND_PASS_H__ +#define __NEURUN_GRAPH_PASS_OPERAND_PASS_H__ + +#include "Pass.h" +#include "ir/Index.h" + +namespace neurun +{ +namespace ir +{ +class Operand; +} // namespace ir +} // namespace neurun + +namespace neurun +{ +namespace ir +{ +namespace pass +{ + +class OperandPass : public Pass +{ +public: + using Pass::Pass; + +public: + std::string id() override = 0; + void run() override final; + virtual void callback(const OperandIndex &i, Operand &o) = 0; +}; + +} // namespace pass +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_GRAPH_PASS_OPERAND_PASS_H__ diff --git a/runtime/neurun/core/src/ir/pass/OperationPass.cc b/runtime/neurun/core/src/ir/pass/OperationPass.cc new file mode 100644 index 000000000..c9438ee39 --- /dev/null +++ b/runtime/neurun/core/src/ir/pass/OperationPass.cc @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OperationPass.h" + +#include "ir/Index.h" +#include "ir/Operation.h" +#include "ir/Graph.h" + +namespace neurun +{ +namespace ir +{ +namespace pass +{ + +void OperationPass::run() +{ + _graph.operations().iterate( + [&](const OperationIndex &index, Operation &node) { callback(index, node); }); +} + +} // namespace pass +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/pass/OperationPass.h b/runtime/neurun/core/src/ir/pass/OperationPass.h new file mode 100644 index 000000000..4b7de7109 --- /dev/null +++ b/runtime/neurun/core/src/ir/pass/OperationPass.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file OperationPass.h + * @brief This file contains OperationPass class + */ + +#ifndef __NEURUN_GRAPH_PASS_OPERATION_PASS_H__ +#define __NEURUN_GRAPH_PASS_OPERATION_PASS_H__ + +#include "Pass.h" +#include "ir/Index.h" + +namespace neurun +{ +namespace ir +{ +class Operation; +} // namespace ir +} // namespace neurun + +namespace neurun +{ +namespace ir +{ +namespace pass +{ + +/** + * @brief Class to iterate over operations and calls callback() method + */ +class OperationPass : public Pass +{ +public: + using Pass::Pass; + +public: + /** + * @brief Returns string id for this pass. Same with class name. + * + * @return string id + */ + std::string id() override = 0; + + /** + * @brief Be called for all nodes of graph. + * @param index is the index of a node in graph + * @param node is the node in graph + */ + virtual void callback(const OperationIndex &index, Operation &node) = 0; + + /** + * @brief Run the pass + */ + void run() final; +}; + +} // namespace pass +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_GRAPH_PASS_OPERATION_PASS_H__ diff --git a/runtime/neurun/core/src/ir/pass/Pass.h b/runtime/neurun/core/src/ir/pass/Pass.h new file mode 100644 index 000000000..0aa0f36a6 --- /dev/null +++ b/runtime/neurun/core/src/ir/pass/Pass.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_GRAPH_PASS_PASS_H__ +#define __NEURUN_GRAPH_PASS_PASS_H__ + +#include <string> + +namespace neurun +{ +namespace ir +{ +class Graph; +} // namespace ir +} // namespace neurun + +namespace neurun +{ +namespace ir +{ +namespace pass +{ + +class Pass +{ +public: + Pass(Graph &graph) : _graph{graph} {} + virtual ~Pass() = default; + +public: + virtual std::string id() = 0; + virtual void run() = 0; + +protected: + Graph &_graph; +}; + +} // namespace pass +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_GRAPH_PASS_PASS_H__ diff --git a/runtime/neurun/core/src/ir/pass/PermutationEliminationPass.cc b/runtime/neurun/core/src/ir/pass/PermutationEliminationPass.cc new file mode 100644 index 000000000..71f3d7e82 --- /dev/null +++ b/runtime/neurun/core/src/ir/pass/PermutationEliminationPass.cc @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PermutationEliminationPass.h" + +#include "ir/Operand.h" +#include "ir/operand/LowerInfo.h" +#include "ir/Graph.h" +#include "backend/IConfig.h" +#include "util/logging.h" +#include "compiler/BackendResolver.h" + +namespace neurun +{ +namespace ir +{ +namespace pass +{ +void PermutationEliminationPass::callback(const OperandIndex &inp_index, Operand &object) +{ + if (_graph.getInputs().contains(inp_index)) + { + eliminateInput(inp_index, object); + } + else if (_graph.getOutputs().contains(inp_index)) + { + eliminateOutput(inp_index, object); + } +} + +void PermutationEliminationPass::eliminateInput(const OperandIndex &inp_index, Operand &object) +{ + auto &model_inputs = _graph.getInputs(); + + // get uses of the model's given input + auto uses = object.getUses(); + + // input must be used just by permutation + if (uses.size() != 1) + { + return; + } + + for (auto input_use : uses.list()) + { + auto &perm_operation = _graph.operations().at(input_use); + auto perm_inputs = perm_operation.getInputs(); + + auto perm_outputs = perm_operation.getOutputs(); + + if (!isPermuteLayerToEliminate(perm_inputs, perm_outputs, true)) + { + return; + } + + assert(perm_inputs.at(0) == inp_index); + + VERBOSE(PermutationEliminationPass::EliminateInput) << "remove NHWC_TO_NCHW permutation\n"; + + // set model's new input, which was output of permutation + model_inputs.replace(inp_index, perm_outputs.at(0)); + + // remove model's input, which is also input of permutation + _graph.removeOperand(inp_index); + + // remove permutation operation + assert(_graph.subgraphs().containsOperation(input_use)); + auto subg_idx = _graph.subgraphs().getOperation(input_use); + _graph.subgraphs().remove(subg_idx); + _graph.operations().remove(input_use); + + VERBOSE(PermutationEliminationPass::EliminateInput) + << inp_index.value() << " is model's input and is removed. New input is " + << perm_outputs.at(0).value() << "\n" + << input_use.value() << " is removed permutation operation\n"; + } +} + +void PermutationEliminationPass::eliminateOutput(const OperandIndex &out_index, Operand &object) +{ + auto &model_outputs = _graph.getOutputs(); + + // get defs of the model's given output + auto defs = object.getDef(); + + // output must use just permutation + if (defs.size() != 1) + { + return; + } + + for (auto output_def : defs.list()) + { + auto &perm_operation = _graph.operations().at(output_def); + auto perm_outputs = perm_operation.getOutputs(); + + auto perm_inputs = perm_operation.getInputs(); + if (!isPermuteLayerToEliminate(perm_inputs, perm_outputs, false)) + { + return; + } + + assert(perm_outputs.at(0) == out_index); + + VERBOSE(PermutationEliminationPass::EliminateOutput) << "remove NCHW_TO_NHWC permutation\n"; + + // Update operations' output that is used by permute operand + for (auto perm_input_index : perm_inputs) + { + auto &perm_input_operand = _graph.operands().at(perm_input_index); + perm_input_operand.removeUse(output_def); + } + + // set model's new output, which was input of permutation + model_outputs.replace(out_index, perm_inputs.at(0)); + + // remove model's output, which is also output of permutation + _graph.removeOperand(out_index); + + // remove permutation operation + assert(_graph.subgraphs().containsOperation(output_def)); + auto subg_idx = _graph.subgraphs().getOperation(output_def); + _graph.subgraphs().remove(subg_idx); + _graph.operations().remove(output_def); + + VERBOSE(PermutationEliminationPass::EliminateOutput) + << out_index.value() << " is model's output and is removed. New output is " + << perm_inputs.at(0).value() << "\n" + << output_def.value() << " is removed permutation operation\n"; + } +} + +bool PermutationEliminationPass::isPermuteLayerToEliminate(const OperandIndexSequence &inp_indexes, + const OperandIndexSequence &out_indexes, + bool is_for_model_input) +{ + auto input_def_factors = _graph.getLowerInfo(inp_indexes.at(0))->def_factors(); + auto output_def_factors = _graph.getLowerInfo(out_indexes.at(0))->def_factors(); + + auto input_layout = input_def_factors.getOnlyElement().layout(); + auto output_layout = output_def_factors.getOnlyElement().layout(); + + if (input_def_factors.size() != 1 || output_def_factors.size() != 1) + { + return false; + } + + // all operands' factor must be the same + for (auto index : inp_indexes) + { + auto op_factor_set = _graph.getLowerInfo(index)->def_factors(); + if (op_factor_set.size() != 1 || + input_layout != _graph.getLowerInfo(index)->def_factors().getOnlyElement().layout()) + { + return false; + } + } + // all operands' factor must be the same + for (auto index : out_indexes) + { + auto op_factor_set = _graph.getLowerInfo(index)->def_factors(); + if (op_factor_set.size() != 1 || + output_layout != _graph.getLowerInfo(index)->def_factors().getOnlyElement().layout()) + { + return false; + } + } + + if (is_for_model_input) + { + // check if this is NHWC_TO_NCHW permutation: must have single input, which is model's input + return (inp_indexes.size() == 1 && input_layout == Layout::NHWC && + output_layout == Layout::NCHW); + } + + // check if this is NCHW_TO_NHWC permutation: must have single output, which is model's output + return (out_indexes.size() == 1 && input_layout == Layout::NCHW && output_layout == Layout::NHWC); +} + +} // namespace pass +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/pass/PermutationEliminationPass.h b/runtime/neurun/core/src/ir/pass/PermutationEliminationPass.h new file mode 100644 index 000000000..4431eabbc --- /dev/null +++ b/runtime/neurun/core/src/ir/pass/PermutationEliminationPass.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__ +#define __NEURUN_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__ + +#include "OperandPass.h" +#include "ir/Operand.h" +#include "ir/OperandIndexSequence.h" + +namespace neurun +{ +namespace ir +{ +namespace pass +{ + +class PermutationEliminationPass : public OperandPass +{ +public: + using OperandPass::OperandPass; + +public: + std::string id() override { return "PermutationEliminationPass"; } + + void callback(const OperandIndex &index, Operand &object) override; + +private: + /** + * @brief Remove Permute operation that permutates input + * + * Note: This function aslo removes model's input and + * sets output of permutation as model's new input + * + * @param inp_index is the target operand index for the elimination + * @param object is the target operand object for the elimination + * + * @return + */ + void eliminateInput(const OperandIndex &inp_index, Operand &object); + + /** + * @brief Remove Permute operation that permutates output of a model + * + * Note: This function aslo removes model's output and + * sets input of permutation as model's new output + * + * @param out_index is the target operand index for the elimination + * @param object is the target operand object for the elimination + * + * @return + */ + void eliminateOutput(const OperandIndex &out_index, Operand &object); + + /** + * @brief Determine if passed operands are permute layer's input and output, that must be + * eliminated + * + * @param inp_index indexes of the input operand to operation + * @param out_index indexes of the output operand to operation + * @param is_for_model_input checking for model's input or output + * + * @return if it is permutation layer + */ + bool isPermuteLayerToEliminate(const OperandIndexSequence &inp_indexes, + const OperandIndexSequence &out_indexes, bool is_for_model_input); +}; + +} // namespace pass +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__ diff --git a/runtime/neurun/core/src/ir/pass/PermutationInsertionPass.cc b/runtime/neurun/core/src/ir/pass/PermutationInsertionPass.cc new file mode 100644 index 000000000..052e3026a --- /dev/null +++ b/runtime/neurun/core/src/ir/pass/PermutationInsertionPass.cc @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PermutationInsertionPass.h" + +#include <cassert> +#include <utility> +#include <unordered_map> + +#include "ir/Operand.h" +#include "ir/operation/LowerInfo.h" +#include "ir/Graph.h" +#include "backend/IConfig.h" +#include "util/logging.h" +#include "cpp14/memory.h" +#include "ir/operation/Permute.h" +#include "ir/operand/Shape4DConvert.h" +#include "compiler/BackendResolver.h" + +namespace neurun +{ +namespace ir +{ +namespace pass +{ + +void PermutationInsertionPass::callback(const OperandIndex &index, Operand &object) +{ + auto &&operand_li = _graph.getLowerInfo(index); + assert(operand_li); + + // NOTE Later, constants also will have Def + // Ignore constants + if (operand_li->def_factors().size() == 0) + { + return; + } + + std::list<OperationIndex> permute_indexes; + + // Build a map for all necessary type of operands + std::unordered_map<operand::PermuteFactor, OperandIndex> factor_to_index; + { + assert(operand_li->def_factors().size() == 1); + for (auto factor : operand_li->def_factors()) + { + factor_to_index.emplace(factor, index); + } + + auto insert_set = operand_li->use_factors() - operand_li->def_factors(); + for (auto factor : insert_set) + { + const auto permute_operation_index = insertPermute(index, factor); + permute_indexes.push_back(permute_operation_index); + VERBOSE(PermutationInsertionPass) << "Insert 'Permute' operation for operand " + << index.value() << std::endl; + const auto &permute_operation = _graph.operations().at(permute_operation_index); + const auto permuted_operand_index = permute_operation.getOutputs().at(0); + factor_to_index.emplace(factor, permuted_operand_index); + } + } + + // Update operations' input that uses this operand + { + std::list<OperationIndex> remove_list; + + auto uses = object.getUses(); + for (auto use : uses.list()) + { + // If permute operation, ignore it + if (std::find(permute_indexes.begin(), permute_indexes.end(), use) != permute_indexes.end()) + continue; + + auto &operation = _graph.operations().at(use); + assert(_graph.subgraphs().containsOperation(use)); + auto subg_index = _graph.subgraphs().getOperation(use); + auto subg_li = _graph.getLowerInfo(subg_index); + assert(subg_li); + const auto subg_layout = subg_li->layout(); + const backend::Backend *backend = subg_li->backend(); + assert(backend); + auto use_node_inputs = operation.getInputs(); + assert(use_node_inputs.contains(index)); + + auto new_index = factor_to_index.at({backend, subg_layout}); + if (index != new_index) + { + // Update from op_seq + _graph.subgraphs().at(subg_index).replaceInput(index, new_index); + + // Update from operation + operation.replaceInput(index, new_index); + + // Update from operand + remove_list.push_back( + use); // Removal should be done in another loop since we are in the loop + _graph.operands().at(new_index).appendUse(use); + } + } + + for (auto &operation : remove_list) + { + object.removeUse(operation); + } + } +} + +OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &operand_index, + const operand::PermuteFactor &factor) +{ + assert(!_graph.isBuildingPhase()); + + auto &operand = _graph.operands().at(operand_index); + + // Generate output operand and permute operation + auto out_operand_index = _graph.addOperand(operand.shape(), operand.typeInfo()); + // change model output if operand_index is model output index + auto &model_outputs = _graph.getOutputs(); + if (model_outputs.contains(operand_index)) + { + model_outputs.replace(operand_index, out_operand_index); + } + + // Find Permute information + auto input_backend = _graph.getLowerInfo(operand_index)->def_factors().getOnlyElement().backend(); + auto output_backend = factor.backend(); + // NOTE Permute may not have specific layout because the layout of input and output may be + // different. + const auto permute_node_layout = Layout::UNKNOWN; + const auto permute_node_backend = backend::BackendManager::get().getDefault(); + const operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout}; + + // Update LowerInfo of input operand + auto operand_lower_info = _graph.getLowerInfo(operand_index); + operand_lower_info->removeUsePermuteFactor(factor); + operand_lower_info->addUsePermuteFactor(permute_node_factor); + + // Update LowerInfo of output operand + auto out_operand_li = + nnfw::cpp14::make_unique<operand::LowerInfo>(operand::asShape4D(operand.shape())); + + // The input and output factors of all nodes will be the same except Permute. So Tensor's + // allocators allocates memory using only the information of def permutation factor now. + // TODO Change param to permute_node_factor + out_operand_li->addDefPermuteFactor(factor); + out_operand_li->addUsePermuteFactor(factor); + _graph.setLowerInfo(out_operand_index, std::move(out_operand_li)); + + auto input_backend_ctx = _graph.backend_resolver()->getBackendContext(input_backend); + auto output_backend_ctx = _graph.backend_resolver()->getBackendContext(output_backend); + + // Insert permute operation to the graph + const auto input_layout = + _graph.getLowerInfo(operand_index)->def_factors().getOnlyElement().layout(); + const auto output_layout = factor.layout(); + using Permute = operation::Permute; + const auto permute_type = [&]() { + if (input_layout == Layout::NHWC && output_layout == Layout::NCHW) + { + return Permute::Type::NHWC_TO_NCHW; + } + else if (input_layout == Layout::NCHW && output_layout == Layout::NHWC) + { + return Permute::Type::NCHW_TO_NHWC; + } + else + { + return Permute::Type::COPY; + } + }(); + auto insert_node = nnfw::cpp14::make_unique<Permute>( + operand_index, out_operand_index, input_backend_ctx, output_backend_ctx, permute_type); + + auto node_index = _graph.operations().push(std::move(insert_node)); + const auto &node = _graph.operations().at(node_index); + + // OpSequence + { + auto subg_index = _graph.subgraphs().emplace(node_index, node, permute_node_layout); + auto &subg = _graph.subgraphs().at(subg_index); + subg.setInputs(node.getInputs()); + subg.setOutputs(node.getOutputs()); + _graph.setLowerInfo(subg_index, nnfw::cpp14::make_unique<operation::LowerInfo>( + permute_node_backend, permute_node_layout)); + } + + // Update Use/Def info + { + _graph.operands().at(operand_index).appendUse(node_index); + _graph.operands().at(out_operand_index).appendDef(node_index); + } + return node_index; +} +} // namespace pass +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/pass/PermutationInsertionPass.h b/runtime/neurun/core/src/ir/pass/PermutationInsertionPass.h new file mode 100644 index 000000000..4065fc6ac --- /dev/null +++ b/runtime/neurun/core/src/ir/pass/PermutationInsertionPass.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__ +#define __NEURUN_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__ + +#include "OperandPass.h" +#include "ir/Operand.h" //for OperationIndex +#include "backend/BackendManager.h" +#include "ir/operand/PermuteFactor.h" + +namespace neurun +{ +namespace ir +{ +namespace pass +{ + +class PermutationInsertionPass : public OperandPass +{ +public: + using OperandPass::OperandPass; + +public: + std::string id() override { return "PermutationInsertionPass"; } + void callback(const OperandIndex &index, Operand &object) override; + + /** + * @brief Insert Permute operation that has given operand as input + * + * @param operand_index is the target operand index for the insertion + * @param factor is the output operand's backend type and layout + * + * @return OperationIndex + */ + OperationIndex insertPermute(const OperandIndex &operand_index, + const operand::PermuteFactor &factor); + +private: +}; + +} // namespace pass +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__ diff --git a/runtime/neurun/core/src/ir/pass/PermutationOperationPass.cc b/runtime/neurun/core/src/ir/pass/PermutationOperationPass.cc new file mode 100644 index 000000000..41a1ad903 --- /dev/null +++ b/runtime/neurun/core/src/ir/pass/PermutationOperationPass.cc @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PermutationOperationPass.h" + +#include "backend/Backend.h" +#include "backend/IConfig.h" +#include "ir/Graph.h" + +namespace neurun +{ +namespace ir +{ +namespace pass +{ + +void PermutationOperationPass::callback(const OperationIndex &, Operation &node) +{ + node.accept(*this); +}; + +void PermutationOperationPass::changeToKeepLayout(const Operation &node) +{ + const auto &output_ind = node.getOutputs().at(0); + const auto &output_obj = _graph.operands().at(output_ind); + + assert(output_obj.getDef().size() == 1); + const auto &node_index = output_obj.getDef().list().front(); + const auto &subg_index = _graph.subgraphs().getOperation(node_index); + + const auto frontend_layout = _graph.subgraphs().at(subg_index).getLayout(); + const auto backend_layout = _graph.getLowerInfo(subg_index)->layout(); + + if (frontend_layout == backend_layout) + { + return; + } + + // CPU supports only NHWC now + if (_graph.getLowerInfo(subg_index)->backend()->config()->id() != "cpu") + { + // TODO Change backend of this node + assert(frontend_layout == Layout::NHWC || backend_layout == Layout::UNKNOWN); + } + + // Divide op_seq based on target operation + { + auto &above_subg = _graph.subgraphs().at(subg_index); + + // Create new op_seq and move information from existing op_seq to new op_seq if target + // node is the end of op_seq + auto it = above_subg.begin(); + // Find iterator of target node in op_seq + while ((it++)->index != node_index) + ; + if (it != above_subg.end()) + { + const auto &below_subg_index = + _graph.subgraphs().emplace(it->index, *it->node, above_subg.getLayout()); + auto &below_subg = _graph.subgraphs().at(below_subg_index); + below_subg.setInputs(it->node->getInputs()); + below_subg.setOutputs(it->node->getOutputs()); + + std::vector<OperationIndex> remove_list; + remove_list.emplace_back(it->index); + while (++it != above_subg.end()) + { + below_subg.appendOperation(it->index, *it->node); + below_subg.setOutputs(it->node->getOutputs()); + remove_list.emplace_back(it->index); + } + + above_subg.setOutputs(node.getOutputs()); + for (const auto &index : remove_list) + { + above_subg.remove(index); + } + + const auto subg_li = _graph.getLowerInfo(subg_index); + _graph.setLowerInfo(below_subg_index, nnfw::cpp14::make_unique<operation::LowerInfo>( + subg_li->backend(), subg_li->layout())); + } + } + + // Remove target operation from op_seq and insert the target operation to new op_seq + { + const auto backend = _graph.getLowerInfo(subg_index)->backend(); + + // Remove target operation from subraph + _graph.subgraphs().removeFromSubgraph(node_index); + + if (!_graph.subgraphs().exist(subg_index)) + { + // Remove lowerinfo for op_seq of target operation if the op_seq does not exist + _graph.removeLowerInfo(subg_index); + } + else + { + // Update op_seq of target operation if the op_seq exists + auto &above_subg = _graph.subgraphs().at(subg_index); + const auto last_node = (--above_subg.end())->node; + above_subg.setOutputs(last_node->getOutputs()); + } + + // Create new op_seq and set information to the op_seq + auto new_subg_index = _graph.subgraphs().emplace(node_index, node, frontend_layout); + auto &new_subg = _graph.subgraphs().at(new_subg_index); + new_subg.setInputs(node.getInputs()); + new_subg.setOutputs(node.getOutputs()); + _graph.setLowerInfo(new_subg_index, + nnfw::cpp14::make_unique<operation::LowerInfo>(backend, frontend_layout)); + } + + // Change PermuteFactors of operands of target node + { + const auto &subg_index = _graph.subgraphs().getOperation(node_index); + const auto subg_li = _graph.getLowerInfo(subg_index); + const auto backend = subg_li->backend(); + const operand::PermuteFactor removed_factor{backend, backend_layout}; + const operand::PermuteFactor new_factor{backend, frontend_layout}; + for (const auto &input : node.getInputs()) + { + bool canRemove = true; + for (const auto &use : _graph.operands().at(input).getUses().list()) + { + if (use != node_index) + { + const auto &use_subg_index = _graph.subgraphs().getOperation(use); + auto use_subg_li = _graph.getLowerInfo(use_subg_index); + if (use_subg_li->backend() == backend && use_subg_li->layout() == backend_layout) + { + canRemove = false; + break; + } + } + } + + auto lower_info = _graph.getLowerInfo(input); + if (canRemove) + { + lower_info->removeUsePermuteFactor(removed_factor); + } + lower_info->addUsePermuteFactor(new_factor); + + // Whether if node's input is an input of model or a constant + if (_graph.operands().at(input).getDef().size() == 0) + { + assert(_graph.getInputs().contains(input) || _graph.operands().at(input).isConstant()); + lower_info->removeDefPermuteFactor(removed_factor); + lower_info->addDefPermuteFactor(new_factor); + } + } + + for (const auto &output : node.getOutputs()) + { + auto lower_info = _graph.getLowerInfo(output); + lower_info->removeDefPermuteFactor(removed_factor); + lower_info->addDefPermuteFactor(new_factor); + + // Whether if node's output is an output of model + if (_graph.operands().at(output).getUses().size() == 0) + { + assert(_graph.getOutputs().contains(output)); + lower_info->removeUsePermuteFactor(removed_factor); + lower_info->addUsePermuteFactor(new_factor); + } + } + } +} + +void PermutationOperationPass::visit(const operation::FullyConnected &node) +{ + const auto &input_ind = node.getInputs().at(operation::FullyConnected::Input::INPUT); + const auto &input_obj = _graph.operands().at(input_ind); + const auto &input_shape = input_obj.shape(); + + if (input_shape.rank() == 4) + { + changeToKeepLayout(node); + } +} + +void PermutationOperationPass::visit(const operation::Gather &node) +{ + const auto &input_ind = node.getInputs().at(operation::Gather::Input::INPUT); + const auto &input_obj = _graph.operands().at(input_ind); + const auto &input_shape = input_obj.shape(); + + const auto &output_ind = node.getOutputs().at(0); + const auto &output_obj = _graph.operands().at(output_ind); + const auto &output_shape = output_obj.shape(); + + if (input_shape.rank() >= 4 || output_shape.rank() >= 4) + { + changeToKeepLayout(node); + } +} + +void PermutationOperationPass::visit(const operation::Reshape &node) +{ + const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT); + const auto &input_obj = _graph.operands().at(input_ind); + const auto &input_shape = input_obj.shape(); + + const auto &output_ind = node.getOutputs().at(0); + const auto &output_obj = _graph.operands().at(output_ind); + const auto &output_shape = output_obj.shape(); + + if (input_shape.rank() >= 4 || output_shape.rank() >= 4) + { + changeToKeepLayout(node); + } +} + +} // namespace pass +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/pass/PermutationOperationPass.h b/runtime/neurun/core/src/ir/pass/PermutationOperationPass.h new file mode 100644 index 000000000..896e0176a --- /dev/null +++ b/runtime/neurun/core/src/ir/pass/PermutationOperationPass.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__ +#define __NEURUN_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__ + +#include "ir/OperationVisitor.h" +#include "OperationPass.h" + +namespace neurun +{ +namespace ir +{ +namespace pass +{ + +class PermutationOperationPass : public OperationPass, public OperationVisitor +{ +public: + using OperationPass::OperationPass; + +public: + std::string id() final { return "PermutationOperationPass"; } + +public: + void callback(const OperationIndex &i, Operation &n) final; + +public: + void visit(const operation::FullyConnected &) final; + void visit(const operation::Gather &) final; + void visit(const operation::Reshape &) final; + +private: + void changeToKeepLayout(const Operation &); +}; + +} // namespace pass +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__ diff --git a/runtime/neurun/core/src/ir/verifier/Verifier.cc b/runtime/neurun/core/src/ir/verifier/Verifier.cc new file mode 100644 index 000000000..7bd8ac512 --- /dev/null +++ b/runtime/neurun/core/src/ir/verifier/Verifier.cc @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Verifier.h" + +#include "ir/Graph.h" +#include "ir/OperationIndexMap.h" + +#include "util/logging.h" + +namespace neurun +{ +namespace ir +{ +namespace verifier +{ + +// +// DAGChecker +// + +bool DAGChecker::verify(const Graph &graph) const +{ + auto &operations = graph.operations(); + bool cyclic = false; + + OperationIndexMap<bool> visited; + operations.iterate( + [&](const OperationIndex &index, const Operation &) { visited[index] = false; }); + OperationIndexMap<bool> on_stack = visited; // Copy from visited + + std::function<void(const OperationIndex &index, const Operation &)> dfs_recursive = + [&](const OperationIndex &index, const Operation &node) -> void { + if (on_stack[index]) + cyclic = true; + if (visited[index]) + return; + visited[index] = true; + on_stack[index] = true; + + for (auto output : node.getOutputs()) + { + const auto &operand = graph.operands().at(output); + for (const auto &use : operand.getUses().list()) + { + dfs_recursive(use, graph.operations().at(use)); + } + } + + on_stack[index] = false; + }; + + operations.iterate(dfs_recursive); + + return !cyclic; +} + +// +// EdgeConsistencyVerifier +// + +bool EdgeConsistencyChecker::verify(const Graph &graph) const +{ + auto &operations = graph.operations(); + uint32_t mismatches = 0; + operations.iterate([&](const OperationIndex &index, const Operation &node) { + for (auto operand_index : node.getInputs()) + { + auto &operand = graph.operands().at(operand_index); + mismatches += (operand.getUses().contains(index) ? 0 : 1); + } + for (auto operand_index : node.getOutputs()) + { + auto &operand = graph.operands().at(operand_index); + mismatches += (operand.getDef().contains(index) ? 0 : 1); + } + }); + return mismatches == 0; +} + +} // namespace verifier +} // namespace ir +} // namespace neurun diff --git a/runtime/neurun/core/src/ir/verifier/Verifier.h b/runtime/neurun/core/src/ir/verifier/Verifier.h new file mode 100644 index 000000000..0993a239e --- /dev/null +++ b/runtime/neurun/core/src/ir/verifier/Verifier.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_GRAPH_VERIFIER_VERIFIER_H__ +#define __NEURUN_GRAPH_VERIFIER_VERIFIER_H__ + +namespace neurun +{ +namespace ir +{ +class Graph; +} // namespace ir +} // namespace neurun + +namespace neurun +{ +namespace ir +{ +namespace verifier +{ + +struct IVerifier +{ + virtual ~IVerifier() = default; + virtual bool verify(const Graph &graph) const = 0; +}; + +} // namespace verifier +} // namespace ir +} // namespace neurun + +namespace neurun +{ +namespace ir +{ +namespace verifier +{ + +class DAGChecker : public IVerifier +{ +public: + bool verify(const Graph &graph) const override; +}; + +class EdgeConsistencyChecker : public IVerifier +{ +public: + bool verify(const Graph &graph) const override; +}; + +} // namespace verifier +} // namespace ir +} // namespace neurun + +#endif // __NEURUN_GRAPH_VERIFIER_VERIFIER_H__ diff --git a/runtime/neurun/core/src/library_info.cc b/runtime/neurun/core/src/library_info.cc new file mode 100644 index 000000000..601d09185 --- /dev/null +++ b/runtime/neurun/core/src/library_info.cc @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +volatile const char info[] = "library information : runtime=neurun"; diff --git a/runtime/neurun/core/src/util/ConfigSource.cc b/runtime/neurun/core/src/util/ConfigSource.cc new file mode 100644 index 000000000..7d57ec178 --- /dev/null +++ b/runtime/neurun/core/src/util/ConfigSource.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/ConfigSource.h" +#include "util/GeneralConfigSource.h" +#include "util/EnvConfigSource.h" + +#include <array> +#include <algorithm> +#include <cassert> + +#include "cpp14/memory.h" + +namespace neurun +{ +namespace util +{ + +static std::unique_ptr<IConfigSource> _source; + +void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); } + +static IConfigSource *config_source() +{ + if (!_source) + { +#ifdef ENVVAR_FOR_DEFAULT_CONFIG + // Default ConfigSource is EnvConfigSource + _source = nnfw::cpp14::make_unique<EnvConfigSource>(); +#else + _source = nnfw::cpp14::make_unique<GeneralConfigSource>(); +#endif // ENVVAR_FOR_DEFAULT_CONFIG + } + return _source.get(); +} + +static std::string getConfigOrDefault(const std::string &key) +{ + static std::unordered_map<std::string, std::string> defaults; + if (defaults.empty()) + { +#define CONFIG(Name, Type, Default) \ + { \ + auto name = std::string{#Name}; \ + defaults.emplace(name, std::string{Default}); \ + } + +#include "util/Config.lst" + +#undef CONFIG + } + + // Treat empty string and absence of the value to be the same + auto ret = config_source()->get(key); + if (ret.empty()) + { + auto itr = defaults.find(key); + if (itr != defaults.end()) + { + // Return the default value if exists + ret = itr->second; + } + } + + return ret; +} + +bool getConfigBool(const std::string &key) +{ + auto raw = getConfigOrDefault(key); + static const std::array<std::string, 5> false_list{"0", "OFF", "FALSE", "N", "NO"}; + auto false_found = std::find(false_list.begin(), false_list.end(), raw); + + return (false_found == false_list.end()); +} + +int getConfigInt(const std::string &key) +{ + auto raw = getConfigOrDefault(key); + return std::stoi(raw); +} + +std::string getConfigString(const std::string &key) { return getConfigOrDefault(key); } + +} // namespace util +} // namespace neurun + +namespace neurun +{ +namespace util +{ +namespace config +{ + +#define CONFIG(Name, Type, Default) const char *Name = #Name; + +#include "util/Config.lst" + +#undef CONFIG + +} // namespace config +} // namespace util +} // namespace neurun diff --git a/runtime/neurun/core/src/util/EnvConfigSource.cc b/runtime/neurun/core/src/util/EnvConfigSource.cc new file mode 100644 index 000000000..52a6bf2a4 --- /dev/null +++ b/runtime/neurun/core/src/util/EnvConfigSource.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/EnvConfigSource.h" + +#include <cstdlib> + +namespace neurun +{ +namespace util +{ + +std::string EnvConfigSource::get(const std::string &key) const +{ + const char *value = std::getenv(key.c_str()); + if (value != nullptr) + { + return value; + } + else + { + return GeneralConfigSource::get(key); + } +} + +} // namespace util +} // namespace neurun diff --git a/runtime/neurun/core/src/util/EventCollectorGlobal.cc b/runtime/neurun/core/src/util/EventCollectorGlobal.cc new file mode 100644 index 000000000..6c3594f5f --- /dev/null +++ b/runtime/neurun/core/src/util/EventCollectorGlobal.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/EventCollectorGlobal.h" + +#include <cassert> +#include <fstream> + +#include "util/ConfigSource.h" + +namespace neurun +{ +namespace util +{ + +EventCollectorGlobal::EventCollectorGlobal() : _recorder{}, _collector{&_recorder} +{ + // DO NOTHING +} + +EventCollectorGlobal::~EventCollectorGlobal() +{ + auto path = util::getConfigString(util::config::TRACE_FILEPATH); + if (!path.empty()) + { + // TODO Need better way for saved file path than just appending ".global" to the trace file path + std::ofstream ofs{path + ".global"}; + _recorder.writeToFile(ofs); + } +} + +EventCollectorGlobal &EventCollectorGlobal::get() +{ + static EventCollectorGlobal instance; + return instance; +} + +EventDurationBlock::EventDurationBlock(const std::string &tag) : _tag{tag} +{ + auto &glob = EventCollectorGlobal::get(); + glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag}); +} +EventDurationBlock::~EventDurationBlock() +{ + auto &glob = EventCollectorGlobal::get(); + glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag}); +} + +EventDurationManual::EventDurationManual(const std::string &tag) : _tag{tag}, _pair{true} {} + +EventDurationManual::~EventDurationManual() +{ + // Check if it has called begin-end pair + assert(_pair); +} + +void EventDurationManual::begin() +{ + _pair = false; + auto &glob = EventCollectorGlobal::get(); + glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag}); +} + +void EventDurationManual::end() +{ + assert(!_pair); + _pair = true; + auto &glob = EventCollectorGlobal::get(); + glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag}); +} + +} // namespace util +} // namespace neurun diff --git a/runtime/neurun/core/src/util/GeneralConfigSource.cc b/runtime/neurun/core/src/util/GeneralConfigSource.cc new file mode 100644 index 000000000..084e4c109 --- /dev/null +++ b/runtime/neurun/core/src/util/GeneralConfigSource.cc @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/GeneralConfigSource.h" +#include "util/logging.h" + +namespace neurun +{ +namespace util +{ + +std::string GeneralConfigSource::get(const std::string &key) const +{ + auto itr = _map.find(key); + if (itr == _map.end()) + { + return ""; + } + else + { + return itr->second; + } +} + +void GeneralConfigSource::set(const std::string &key, const std::string &val) +{ + VERBOSE(GeneralConfigSource) << key << " : " << val << std::endl; + _map[key] = val; +} + +} // namespace util +} // namespace neurun diff --git a/runtime/neurun/core/src/util/Padding.cc b/runtime/neurun/core/src/util/Padding.cc new file mode 100644 index 000000000..2e2202b58 --- /dev/null +++ b/runtime/neurun/core/src/util/Padding.cc @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/Padding.h" +#include "util/Utils.h" + +#include <algorithm> +#include <stdexcept> + +namespace neurun +{ +namespace util +{ + +ir::ExplicitPadding validPadding(void) +{ + // + // ANEURALNETWORKS_PADDING_VALID + // + // VALID padding. No padding. + // + // When the input size is not evenly divisible by the filter size, + // the input at the end that could not fill the whole filter tile + // will simply be ignored. + // + ir::ExplicitPadding padding; + + padding.top = 0; + padding.bottom = 0; + padding.left = 0; + padding.right = 0; + + return padding; +} + +ir::ExplicitPadding samePaddingUsingIFM(const ir::FeatureShape &ifm_shape, const ir::Stride &stride, + uint32_t kw, uint32_t kh) +{ + ir::ExplicitPadding padding; + + // ANEURALNETWORKS_PADDING_SAME (from NNAPI spec) + // + // SAME padding. Padding on both ends are the "same": + // + // padding_to_beginning = total_padding / 2 + // padding_to_end = (total_padding + 1)/2. + // + const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical; + const int32_t horizontal_expected_output = + (ifm_shape.W + stride.horizontal - 1) / stride.horizontal; + + const int32_t vertical_needed_input = (vertical_expected_output - 1) * stride.vertical + kh; + const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H); + + const int32_t horizontal_needed_input = (horizontal_expected_output - 1) * stride.horizontal + kw; + const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W); + + padding.top = vertical_total_padding / 2; + padding.bottom = (vertical_total_padding + 1) / 2; + padding.left = horizontal_total_padding / 2; + padding.right = (horizontal_total_padding + 1) / 2; + + return padding; +} + +ir::ExplicitPadding samePadding(const ir::FeatureShape &ifm_shape, + const ir::FeatureShape &ofm_shape, const ir::Stride &stride, + uint32_t kw, uint32_t kh) +{ + const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical; + const int32_t horizontal_expected_output = + (ifm_shape.W + stride.horizontal - 1) / stride.horizontal; + assert(vertical_expected_output == ofm_shape.H); + assert(horizontal_expected_output == ofm_shape.W); + + UNUSED_RELEASE(ofm_shape); + UNUSED_RELEASE(vertical_expected_output); + UNUSED_RELEASE(horizontal_expected_output); + + return samePaddingUsingIFM(ifm_shape, stride, kw, kh); +} + +ir::ExplicitPadding calculatePadding(const ir::Padding &padding, const ir::FeatureShape &ifm_shape, + const ir::FeatureShape &ofm_shape, const ir::Stride &stride, + uint32_t kw, uint32_t kh) +{ + if (padding.type == ir::PaddingType::EXPLICIT) + { + return padding.param; + } + else if (padding.type == ir::PaddingType::SAME) + { + return samePadding(ifm_shape, ofm_shape, stride, kw, kh); + } + else if (padding.type == ir::PaddingType::VALID) + { + return validPadding(); + } + else + { + throw std::runtime_error{"Cannot handle padding type"}; + } +} + +} // namespace util +} // namespace neurun diff --git a/runtime/neurun/core/src/util/ShapeInference.cc b/runtime/neurun/core/src/util/ShapeInference.cc new file mode 100644 index 000000000..6fa29e7d8 --- /dev/null +++ b/runtime/neurun/core/src/util/ShapeInference.cc @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/Utils.h" +#include "ir/InternalType.h" +#include "ir/Shape.h" +#include "ir/operation/AvgPool2D.h" +#include "ir/operation/MaxPool2D.h" +#include "util/ShapeInference.h" + +namespace neurun +{ +namespace shape_inference +{ + +// +// Helper functions +// + +namespace +{ + +template <typename T, typename U> +typename std::enable_if<std::is_integral<T>::value && std::is_integral<U>::value, + typename std::common_type<T, U>::type>::type +ceil_div(T dividend, U divisor) +{ + assert(dividend > 0 && divisor > 0 && "this implementations is for positive numbers only"); + return (dividend + divisor - 1) / divisor; +} + +// Calculate the result of broadcast of two shapes +ir::Shape broadcastShapes(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape) +{ + ir::Shape out_shape; + auto max_rank = std::max(lhs_shape.rank(), rhs_shape.rank()); + + for (int idx = 0; idx < max_rank; ++idx) + { + // Go over operands dimensions from right to left + int lhs_idx = lhs_shape.rank() - idx - 1; + int rhs_idx = rhs_shape.rank() - idx - 1; + + int32_t lhs_dim = lhs_idx >= 0 ? lhs_shape.dim(lhs_idx) : 1; + int32_t rhs_dim = rhs_idx >= 0 ? rhs_shape.dim(rhs_idx) : 1; + + if (lhs_dim != 1 && rhs_dim != 1 && lhs_dim != rhs_dim) + throw std::runtime_error("Incompatible shapes for broadcast"); + + out_shape.prepend(std::max(lhs_dim, rhs_dim)); + } + + return out_shape; +} + +// Calculate output height and width of convolution-like operation +std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, const int ker_h, + const int ker_w, const ir::Padding pad, + const ir::Stride stride) +{ + int32_t out_h = 0, out_w = 0; + + switch (pad.type) + { + case ir::PaddingType::SAME: + out_h = ceil_div(in_h, stride.vertical); + out_w = ceil_div(in_w, stride.horizontal); + break; + case ir::PaddingType::VALID: + out_h = ceil_div(in_h - ker_h + 1, stride.vertical); + out_w = ceil_div(in_w - ker_w + 1, stride.horizontal); + break; + case ir::PaddingType::EXPLICIT: + out_h = (in_h + pad.param.top + pad.param.bottom - ker_h) / stride.vertical + 1; + out_w = (in_w + pad.param.left + pad.param.right - ker_w) / stride.horizontal + 1; + break; + default: + assert(false); + } + + return {out_h, out_w}; +} + +} // namespace + +// +// Shape inference +// + +Shapes inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape) +{ + return {broadcastShapes(lhs_shape, rhs_shape)}; +} + +Shapes inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param ¶m, + const ir::Layout layout) +{ + assert(layout == ir::Layout::NHWC); + auto ifm_shape = in_shape.asFeature(layout); + const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw, + param.padding, param.stride); + // Pooling don't change number of channels and batch size + return {ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C}}; +} + +Shapes inferConcatShape(const Shapes &in_shapes, const ir::operation::Concat::Param ¶m) +{ + const int32_t concat_axis = param.axis; + const auto &first_in_shape = in_shapes[0]; + + // Check that all shapes are equal except for concat axis dimension + for (const auto &in_shape : in_shapes) + { + assert(in_shape.rank() == first_in_shape.rank()); + for (int64_t dim_idx = 0; dim_idx < in_shape.rank(); ++dim_idx) + assert(dim_idx == concat_axis || in_shape.dim(dim_idx) == first_in_shape.dim(dim_idx)); + } + + // Calculate output shape + ir::Shape out_shape(first_in_shape); + out_shape.dim(concat_axis) = 0; + for (const auto &in_shape : in_shapes) + out_shape.dim(concat_axis) += in_shape.dim(concat_axis); + return {out_shape}; +} + +Shapes inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param ¶m, + const ir::Layout layout) +{ + assert(layout == ir::Layout::NHWC); + auto ifm_shape = in_shape.asFeature(layout); + const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw, + param.padding, param.stride); + // Pooling don't change number of channels and batch size + return {ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C}}; +} + +Shapes inferConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape, + const ir::operation::Conv2D::Param ¶m, ir::Layout layout) +{ + assert(layout == ir::Layout::NHWC); + auto ifm_shape = in_shape.asFeature(layout); + + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in] + auto kf_shape = ker_shape.asFeature(layout); + assert(ifm_shape.C == kf_shape.C); + + const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, kf_shape.H, kf_shape.W, + param.padding, param.stride); + + return {ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, kf_shape.N}}; +} + +Shapes inferDepthwiseConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape, + const ir::operation::DepthwiseConv2D::Param ¶m, + ir::Layout layout) +{ + assert(layout == ir::Layout::NHWC); + auto ifm_shape = in_shape.asFeature(layout); + + // Kernel format is [1, kernel_height, kernel_width, depth_out] + auto kf_shape = ker_shape.asFeature(layout); + assert(kf_shape.C == static_cast<int32_t>(ifm_shape.C * param.multiplier)); + assert(kf_shape.N == 1); + + const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, kf_shape.H, kf_shape.W, + param.padding, param.stride); + + return {ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, kf_shape.C}}; +} + +Shapes inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape) +{ + assert(in_shape.rank() >= 2); + assert(ker_shape.rank() == 2); + + const auto input_size_with_batch = in_shape.num_elements(); + const auto num_units = ker_shape.dim(0); + const auto input_size = ker_shape.dim(1); + const auto batch_size = input_size_with_batch / input_size; + assert(input_size_with_batch % input_size == 0); + + return {{ir::Shape({static_cast<int32_t>(batch_size), num_units})}}; +} + +} // namespace shape_inference +} // namespace neurun diff --git a/runtime/neurun/core/src/util/Utils.cc b/runtime/neurun/core/src/util/Utils.cc new file mode 100644 index 000000000..1e24e28d4 --- /dev/null +++ b/runtime/neurun/core/src/util/Utils.cc @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/Utils.h" + +#include <cassert> + +namespace neurun +{ +namespace util +{ + +const char *to_string(const ir::PaddingType type) +{ + assert((type == ir::PaddingType::EXPLICIT) || (type == ir::PaddingType::SAME) || + (type == ir::PaddingType::VALID)); + + switch (type) + { + case ir::PaddingType::EXPLICIT: + return "Padding::EXPLICIT"; + case ir::PaddingType::SAME: + return "Padding::SAME"; + case ir::PaddingType::VALID: + return "Padding::VALID"; + } + + return nullptr; +} + +Coordinates convertCoordinates(const Coordinates &from_coordinates, ir::Layout from_layout, + ir::Layout to_layout) +{ + assert(from_coordinates.size() == 4); + Coordinates to{from_coordinates}; + if (from_layout == ir::Layout::NHWC && to_layout == ir::Layout::NCHW) + { + to.set(0, from_coordinates[0]); + to.set(1, from_coordinates[3]); + to.set(2, from_coordinates[1]); + to.set(3, from_coordinates[2]); + } + else if (from_layout == ir::Layout::NCHW && to_layout == ir::Layout::NHWC) + { + to.set(0, from_coordinates[0]); + to.set(1, from_coordinates[2]); + to.set(2, from_coordinates[3]); + to.set(3, from_coordinates[1]); + } + + return to; +} + +} // namespace util +} // namespace neurun diff --git a/runtime/neurun/core/src/util/logging.cc b/runtime/neurun/core/src/util/logging.cc new file mode 100644 index 000000000..c23e2b53c --- /dev/null +++ b/runtime/neurun/core/src/util/logging.cc @@ -0,0 +1,7 @@ +#include "util/logging.h" + +neurun::util::logging::Context &neurun::util::logging::Context::get() noexcept +{ + static Context ctx; + return ctx; +} diff --git a/runtime/neurun/frontend/CMakeLists.txt b/runtime/neurun/frontend/CMakeLists.txt new file mode 100644 index 000000000..5ea6cdadd --- /dev/null +++ b/runtime/neurun/frontend/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectories() diff --git a/runtime/neurun/frontend/base_loader/CMakeLists.txt b/runtime/neurun/frontend/base_loader/CMakeLists.txt new file mode 100644 index 000000000..358fc2646 --- /dev/null +++ b/runtime/neurun/frontend/base_loader/CMakeLists.txt @@ -0,0 +1,7 @@ +if(NOT BUILD_TFLITE_LOADER AND NOT BUILD_CIRCLE_LOADER) + return() +endif(NOT BUILD_TFLITE_LOADER AND NOT BUILD_CIRCLE_LOADER) + +add_library(base_loader INTERFACE) +target_include_directories(base_loader INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include) +target_link_libraries(base_loader INTERFACE neurun_core nnfw_lib_cpp14) diff --git a/runtime/neurun/frontend/base_loader/include/base_loader.h b/runtime/neurun/frontend/base_loader/include/base_loader.h new file mode 100644 index 000000000..ae1562f6c --- /dev/null +++ b/runtime/neurun/frontend/base_loader/include/base_loader.h @@ -0,0 +1,1278 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __BASE_LOADER_BASE_LOADER_H__ +#define __BASE_LOADER_BASE_LOADER_H__ + +#include "ir/Graph.h" +#include "ir/Operations.Include.h" + +#include <map> +#include <cpp14/memory.h> +#include <fstream> +#include <limits> + +namespace neurun +{ +namespace base_loader +{ + +template <typename LoaderDomain, typename SpecificLoader> class BaseLoader +{ + using Verifier = typename LoaderDomain::Verifier; + using ActivationFunctionType = typename LoaderDomain::ActivationFunctionType; + using Buffer = typename LoaderDomain::Buffer; + using BuiltinOperator = typename LoaderDomain::BuiltinOperator; + using CustomOptionsFormat = typename LoaderDomain::CustomOptionsFormat; + using Model = typename LoaderDomain::Model; + using Operator = typename LoaderDomain::Operator; + using Padding = typename LoaderDomain::Padding; + using Pool2DOptions = typename LoaderDomain::Pool2DOptions; + using SubGraph = typename LoaderDomain::SubGraph; + using Tensor = typename LoaderDomain::Tensor; + using TensorType = typename LoaderDomain::TensorType; + +public: + /** + * @brief Construct a new Loader object + * + * @param graph reference on graph + */ + explicit BaseLoader(ir::Graph &graph) : _graph(graph), _model{nullptr} {} + + /** + * @brief Load a model from file + * + * @param file_path + */ + void loadFromFile(const char *file_path); + +protected: + ~BaseLoader() = default; + + void loadModel(); + + // Helper functions + ir::Activation convertActivation(ActivationFunctionType type); + ir::DataType tensorTypeToDataType(TensorType type); + + // Create operands form tflite::Tensor + ir::OperandIndex loadOperand(const Tensor *tensor); + void loadOperationIO(const Operator *op, ir::OperandIndexSequence &inputs, + ir::OperandIndexSequence &outputs); + // Create operations from Operator + void loadOperation(const Operator *op); + // Load Strides and Paddings from options to param + template <typename Param, typename OptionsType> + void loadStridesAndPaddings(Param ¶m, const OptionsType *options); + // Load Pool2D param + template <typename Param> void loadPool2D(Param ¶m, const Pool2DOptions *options); + + // Operations + void loadConv2D(const Operator *op); + void loadDepthwiseConv2D(const Operator *op); + void loadTransposeConv(const Operator *op); + void loadAvgPool2D(const Operator *op); + void loadReshape(const Operator *op); + void loadSoftmax(const Operator *op); + void loadMaxPool2D(const Operator *op); + void loadConcatenation(const Operator *op); + void loadInstanceNorm(const Operator *op); + void loadFC(const Operator *op); + void loadAdd(const Operator *op); + void loadSub(const Operator *op); + void loadMul(const Operator *op); + void loadDiv(const Operator *op); + void loadPack(const Operator *op); + void loadRelu(const Operator *op); + void loadRelu6(const Operator *op); + void loadResizeBilinear(const Operator *op); + void loadRsqrt(const Operator *op); + void loadSqrt(const Operator *op); + void loadSquaredDifference(const Operator *op); + void loadTanh(const Operator *op); + void loadTranspose(const Operator *op); + void loadMean(const Operator *op); + void loadReduceMax(const Operator *op); + void loadPad(const Operator *op); + void loadLogistic(const Operator *op); + void loadExp(const Operator *op); + void loadGather(const Operator *op); + void loadCustom(const Operator *op); + void loadSpaceToBatchND(const Operator *op); + void loadBatchToSpaceND(const Operator *op); + void loadReduceSum(const Operator *op); + void loadSqueeze(const Operator *op); + void loadPrelu(const Operator *op); + void loadSplit(const Operator *op); + void loadSlice(const Operator *op); + void loadStridedSlice(const Operator *op); + void loadUnpack(const Operator *op); + void loadMinimum(const Operator *op); + void loadMaximum(const Operator *op); + void loadCast(const Operator *op); + void loadComparison(const Operator *op); + void loadOneHot(const Operator *op); + +protected: + // Buffer for loading (if needed) + std::vector<char> _buffer; + // Reference on loadable Graph + ir::Graph &_graph; + const Model *_model; + // Maps Tensor indices to neurun Operands. + std::vector<ir::OperandIndex> _tensor_to_operand; + // Verifier + std::unique_ptr<Verifier> _verifier; +}; + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::loadFromFile(const char *file_path) +{ + std::ifstream stream(file_path, std::fstream::in | std::fstream::binary); + + if (!stream) + { + std::string msg = "Failed to open file `"; + msg += file_path; + msg += "`"; + throw std::runtime_error{msg}; + } + + stream.seekg(0, stream.end); + auto size = stream.tellg(); + stream.seekg(0, stream.beg); + + _buffer.resize(size); + stream.read(_buffer.data(), size); + + stream.close(); + + // Prepare verifier + _verifier = nnfw::cpp14::make_unique<Verifier>( + reinterpret_cast<const std::uint8_t *>(_buffer.data()), _buffer.size()); + + loadModel(); +} + +template <typename LoaderDomain, typename SpecificLoader> +ir::Activation BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::convertActivation( + const ActivationFunctionType type) +{ + switch (type) + { + case ActivationFunctionType::ActivationFunctionType_NONE: + return ir::Activation::NONE; + case ActivationFunctionType::ActivationFunctionType_RELU: + return ir::Activation::RELU; + case ActivationFunctionType::ActivationFunctionType_RELU_N1_TO_1: + return ir::Activation::RELU1; + case ActivationFunctionType::ActivationFunctionType_RELU6: + return ir::Activation::RELU6; + case ActivationFunctionType::ActivationFunctionType_TANH: + return ir::Activation::TANH; + default: + throw std::runtime_error(std::string("Unsupported activation type: ") + .append(EnumNameActivationFunctionType(type))); + } +} + +template <typename LoaderDomain, typename SpecificLoader> +ir::DataType +BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::tensorTypeToDataType(const TensorType type) +{ + switch (type) + { + case TensorType::TensorType_FLOAT32: + return ir::DataType::FLOAT32; + case TensorType::TensorType_INT32: + return ir::DataType::INT32; + case TensorType::TensorType_BOOL: + return ir::DataType::BOOL8; + case TensorType::TensorType_UINT8: + return ir::DataType::UINT8; + default: + throw std::runtime_error( + std::string("Unsupported tensor type: ").append(EnumNameTensorType(type))); + } +} + +template <typename LoaderDomain, typename SpecificLoader> +ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Tensor *tensor) +{ + ir::Shape shape; + // Shape + const auto *tensor_shape = tensor->shape(); + for (const auto &dim : *tensor_shape) + { + shape.append(dim); + } + // Type + ir::DataType data_type = tensorTypeToDataType(tensor->type()); + // Quantization + auto q_params = tensor->quantization(); + float scale = 0.0; + long zero_point = 0; + if (q_params != nullptr) + { + if (q_params->scale()) + { + if (q_params->scale()->size() != 1) + { + throw std::runtime_error("Only 1 scale for a tensor is supported."); + } + scale = q_params->scale()->Get(0); + } + + if (q_params->zero_point()) + { + if (q_params->zero_point()->size() != 1) + { + throw std::runtime_error("Only 1 zero_point value for a tensor is supported."); + } + zero_point = q_params->zero_point()->Get(0); + // zero_point is long while TypeInfo.zero_point is defined as int32_t. + assert(zero_point >= std::numeric_limits<int32_t>::min()); + assert(zero_point <= std::numeric_limits<int32_t>::max()); + } + auto details = q_params->details_as_CustomQuantization(); + if (details != nullptr) + throw std::runtime_error("Custom Quantization is not supported"); + + if (q_params->scale() && q_params->zero_point()) + { + data_type = ir::DataType::QUANT8_ASYMM; + } + } + // Create TypeInfo + ir::TypeInfo type_info(data_type, scale, zero_point); + // Create operand + const auto operand_index = _graph.addOperand(shape, type_info); + + // Constant tensors are indicated by non-empty data. + const auto *data = _model->buffers()->Get(tensor->buffer())->data(); + if (data != nullptr) + { + auto ptr = nnfw::cpp14::make_unique<ir::CachedData>(data->data(), data->size()); + _graph.setOperandValue(operand_index, std::move(ptr)); + } + + // Name unused + // auto name = tensor->name(); + // Variablie + if (tensor->is_variable()) + throw std::runtime_error("Variable tensor not supported!"); + + return operand_index; +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadOperationIO(const Operator *op, + ir::OperandIndexSequence &inputs, + ir::OperandIndexSequence &outputs) +{ + for (const std::int32_t idx : *op->inputs()) + { + inputs.append(_tensor_to_operand[idx]); + } + + for (const std::int32_t idx : *op->outputs()) + { + outputs.append(_tensor_to_operand[idx]); + } +} + +template <typename LoaderDomain, typename SpecificLoader> +template <typename Param, typename OptionsType> +void BaseLoader<LoaderDomain, SpecificLoader>::loadStridesAndPaddings(Param ¶m, + const OptionsType *options) +{ + // Strides + param.stride.vertical = options->stride_w(); + param.stride.horizontal = options->stride_h(); + // Paddings + if (options->padding() == Padding::Padding_SAME) + param.padding.type = ir::PaddingType::SAME; + if (options->padding() == Padding::Padding_VALID) + param.padding.type = ir::PaddingType::VALID; + // param paddings indexes unused +} + +template <typename LoaderDomain, typename SpecificLoader> +template <typename Param> +void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2D(Param ¶m, + const Pool2DOptions *options) +{ + // Strides and Paddings + loadStridesAndPaddings(param, options); + // Filter width and height + // Strides + param.kw = options->filter_width(); + param.kh = options->filter_height(); + // Activation + param.activation = convertActivation(options->fused_activation_function()); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadConv2D(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::Conv2D::Param param; + const auto *options = op->builtin_options_as_Conv2DOptions(); + param.activation = convertActivation(options->fused_activation_function()); + loadStridesAndPaddings(param, options); + // Dilation h/w factor unused + std::unique_ptr<ir::Operation> new_op(new ir::operation::Conv2D(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadDepthwiseConv2D(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::DepthwiseConv2D::Param param; + const auto *options = op->builtin_options_as_DepthwiseConv2DOptions(); + param.activation = convertActivation(options->fused_activation_function()); + loadStridesAndPaddings(param, options); + // Multiplier + param.multiplier = options->depth_multiplier(); + // Dilation h/w factor unused + std::unique_ptr<ir::Operation> new_op(new ir::operation::DepthwiseConv2D(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadTransposeConv(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::TransposeConv::Param param; + const auto *options = op->builtin_options_as_TransposeConvOptions(); + loadStridesAndPaddings(param, options); + std::unique_ptr<ir::Operation> new_op(new ir::operation::TransposeConv(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadAvgPool2D(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::AvgPool2D::Param param; + const auto *options = op->builtin_options_as_Pool2DOptions(); + + loadPool2D(param, options); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::AvgPool2D(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadReshape(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + // const auto *options = op->builtin_options_as_ReshapeOptions(); + // No params + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Reshape(inputs, outputs)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadSoftmax(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::Softmax::Param param; + const auto *options = op->builtin_options_as_SoftmaxOptions(); + // Beta + param.beta = options->beta(); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Softmax(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadMaxPool2D(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::MaxPool2D::Param param; + const auto *options = op->builtin_options_as_Pool2DOptions(); + + loadPool2D(param, options); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::MaxPool2D(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadConcatenation(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::Concat::Param param; + const auto *options = op->builtin_options_as_ConcatenationOptions(); + // Axis + param.axis = options->axis(); + param.rank = _graph.operands().at(outputs.at(0)).shape().rank(); + // activation unused + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Concat(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadInstanceNorm(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::InstanceNorm::Param param; + const auto *options = op->builtin_options_as_InstanceNormOptions(); + + param.activation = convertActivation(options->fused_activation_function()); + // Use default value 1e-5 if value of epsilon is zero + param.epsilon = options->epsilon() == 0.f ? 1e-5 : options->epsilon(); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::InstanceNorm(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadFC(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + const auto &input_operand = _graph.operands().at(inputs.at(ir::operation::FullyConnected::INPUT)); + auto &weights_operand = _graph.operands().at(inputs.at(ir::operation::FullyConnected::WEIGHT)); + if (input_operand.typeInfo().type() == ir::DataType::FLOAT32 && + weights_operand.typeInfo().type() == ir::DataType::QUANT8_ASYMM) + { + weights_operand.type(ir::DataType::QUANT8_SYMM); + } + + ir::operation::FullyConnected::Param param; + const auto *options = op->builtin_options_as_FullyConnectedOptions(); + + param.activation = convertActivation(options->fused_activation_function()); + // weights_format unused + + std::unique_ptr<ir::Operation> new_op(new ir::operation::FullyConnected(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadAdd(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::Add::Param param; + const auto *options = op->builtin_options_as_AddOptions(); + + param.activation = convertActivation(options->fused_activation_function()); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Add(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadSub(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::Sub::Param param; + const auto *options = op->builtin_options_as_SubOptions(); + + param.activation = convertActivation(options->fused_activation_function()); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Sub(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadMul(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::Mul::Param param; + const auto *options = op->builtin_options_as_MulOptions(); + + param.activation = convertActivation(options->fused_activation_function()); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Mul(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadDiv(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::Div::Param param; + const auto *options = op->builtin_options_as_DivOptions(); + + param.activation = convertActivation(options->fused_activation_function()); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Div(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadPack(const Operator *op) +{ + // This runtime_error will be removed if the one of backend supports this operation + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::Pack::Param param; + const auto *options = op->builtin_options_as_PackOptions(); + param.num = options->values_count(); + param.axis = options->axis(); + param.rank = _graph.operands().at(outputs.at(0)).shape().rank(); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Pack(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU(inputs, outputs)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu6(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU6(inputs, outputs)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeBilinear(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + auto input = inputs.at(0); + auto size = inputs.at(1); + + // FIXME Handle ResizeBilinearOptions. + if (!_graph.operands().at(size).isConstant()) + throw std::runtime_error("ResizeBilinear: non-constant 'size' is not supported."); + + std::vector<std::int32_t> size_v = _graph.operands().at(size).template asVector<std::int32_t>(); + + ir::operation::ResizeBilinear::Param param; + param.height_out = size_v[0]; + param.width_out = size_v[1]; + + std::unique_ptr<ir::Operation> new_op(new ir::operation::ResizeBilinear({input}, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadRsqrt(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::RSQRT(inputs, outputs)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadSqrt(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::SQRT(inputs, outputs)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadSquaredDifference(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::SquaredDifference(inputs, outputs)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadTanh(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Tanh(inputs, outputs)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadTranspose(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + auto input = inputs.at(0); + auto perm = inputs.at(1); + + if (!_graph.operands().at(perm).isConstant()) + throw std::runtime_error("Transpose: non-constant 'perm' is not supported."); + + ir::operation::Transpose::Param param; + param.perm = _graph.operands().at(perm).template asVector<int>(); + param.rank = _graph.operands().at(inputs.at(0)).shape().rank(); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Transpose({input}, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadMean(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + auto input = inputs.at(0); + auto axes = inputs.at(1); + + if (!_graph.operands().at(axes).isConstant()) + throw std::runtime_error("Mean: non-constant 'axes' is not supported."); + + ir::operation::Mean::Param param; + param.axes = _graph.operands().at(axes).template asVector<int>(); + param.keep_dims = op->builtin_options_as_ReducerOptions()->keep_dims(); + param.rank = _graph.operands().at(inputs.at(0)).shape().rank(); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Mean({input}, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadReduceMax(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + auto input = inputs.at(0); + auto axes = inputs.at(1); + + // FIXME Handle ReducerOptions. + if (!_graph.operands().at(axes).isConstant()) + throw std::runtime_error("ReduceSum: non-constant 'axes' is not supported."); + + ir::operation::ReduceMax::Param param; + param.axes = _graph.operands().at(axes).template asVector<int>(); + param.keep_dims = op->builtin_options_as_ReducerOptions()->keep_dims(); + param.rank = _graph.operands().at(inputs.at(0)).shape().rank(); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::ReduceMax({input}, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadPad(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::Pad::Param param; + param.rank = _graph.operands().at(inputs.at(0)).shape().rank(); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Pad(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadLogistic(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Logistic(inputs, outputs)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadExp(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Exp(inputs, outputs)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadGather(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + ir::operation::Gather::Param param; + param.axis = op->builtin_options_as_GatherOptions()->axis(); + param.rank = _graph.operands().at(inputs.at(0)).shape().rank(); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Gather(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadSpaceToBatchND(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + std::unique_ptr<ir::Operation> new_op{new ir::operation::SpaceToBatchND{inputs, outputs}}; + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadBatchToSpaceND(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + auto input = inputs.at(0); + auto block_shape = inputs.at(1); + auto crops = inputs.at(2); + + if (!_graph.operands().at(crops).isConstant()) + throw std::runtime_error("BatchToSpaceND: non-constant 'crops' is not supported."); + + std::vector<std::int32_t> crops_v = _graph.operands().at(crops).template asVector<std::int32_t>(); + assert(crops_v.size() == 4); + if (crops_v != std::vector<std::int32_t>{0, 0, 0, 0}) + throw std::runtime_error("BatchToSpaceND: 'crops' other than {0, 0, 0, 0} is not supported."); + + std::unique_ptr<ir::Operation> new_op{ + new ir::operation::BatchToSpaceND{{input, block_shape}, outputs}}; + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadReduceSum(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + auto input = inputs.at(0); + auto axes = inputs.at(1); + + // FIXME Handle ReducerOptions. + if (!_graph.operands().at(axes).isConstant()) + throw std::runtime_error("ReduceSum: non-constant 'axes' is not supported."); + + ir::operation::ReduceSum::Param param; + param.axes = _graph.operands().at(axes).template asVector<int>(); + param.keep_dims = op->builtin_options_as_ReducerOptions()->keep_dims(); + param.rank = _graph.operands().at(inputs.at(0)).shape().rank(); + + std::unique_ptr<ir::Operation> new_op{new ir::operation::ReduceSum{{input}, outputs, param}}; + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + auto *op_code = _model->operator_codes()->Get(op->opcode_index()); + auto custom_op_id = op_code->custom_code()->str(); + + auto constraint = ir::OperandConstraint::createExact(inputs.size()); + + assert(op->custom_options_format() == CustomOptionsFormat::CustomOptionsFormat_FLEXBUFFERS && + "Unsupported custom operation options format"); + + size_t custom_op_data_size = op->custom_options()->size(); + auto custom_op_data = new char[custom_op_data_size]; + std::copy(op->custom_options()->begin(), op->custom_options()->end(), custom_op_data); + + ir::operation::Custom::Userdata userdata{}; + userdata.data = custom_op_data; + userdata.size = custom_op_data_size; + + auto new_op = nnfw::cpp14::make_unique<ir::operation::Custom>(constraint, inputs, outputs, + custom_op_id, userdata); + + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadSqueeze(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::Squeeze::Param param{}; + const auto *options = op->builtin_options_as_SqueezeOptions(); + const auto *dims = options->squeeze_dims(); + if (dims) + { + if (dims->Length() > sizeof(param.dims) / sizeof(param.dims[0])) + throw std::runtime_error("Squeeze: 'param.ndims' is out of range."); + param.ndim = dims->Length(); + for (int i = 0; i < param.ndim; ++i) + param.dims[i] = dims->Get(i); + } + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Squeeze(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadPrelu(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::PReLU(inputs, outputs)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadSplit(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + // Notice : input order is strange for tflite split + auto input = inputs.at(1); + auto axis = inputs.at(0); + + // FIXME Handle SplitOptions. + if (!_graph.operands().at(axis).isConstant()) + throw std::runtime_error("Split: non-constant 'axis' is not supported."); + + ir::operation::Split::Param param{}; + param.axis = _graph.operands().at(axis).template asScalar<int>(); + const auto *options = op->builtin_options_as_SplitOptions(); + param.num_splits = options->num_splits(); + param.rank = _graph.operands().at(inputs.at(0)).shape().rank(); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Split({input}, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadSlice(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::Slice::Param param; + param.rank = _graph.operands().at(inputs.at(0)).shape().rank(); + + std::unique_ptr<ir::Operation> new_op{new ir::operation::Slice{inputs, outputs, param}}; + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadStridedSlice(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::StridedSlice::Param param; + + const auto *options = op->builtin_options_as_StridedSliceOptions(); + param.begin_mask = options->begin_mask(); + param.end_mask = options->end_mask(); + param.shrink_axis_mask = options->shrink_axis_mask(); + param.rank = _graph.operands().at(inputs.at(0)).shape().rank(); + + std::unique_ptr<ir::Operation> new_op{new ir::operation::StridedSlice{inputs, outputs, param}}; + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadUnpack(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::Unpack::Param param; + const auto *options = op->builtin_options_as_UnpackOptions(); + param.num = options->num(); + param.axis = options->axis(); + param.rank = _graph.operands().at(inputs.at(0)).shape().rank(); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Unpack(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadMinimum(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Min(inputs, outputs)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadMaximum(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Max(inputs, outputs)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadCast(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Cast(inputs, outputs)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadComparison(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::Comparison::Param param; + + const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code(); + + switch (builtin_op) + { + case BuiltinOperator::BuiltinOperator_EQUAL: + param.comparison_type = ir::operation::Comparison::ComparisonType::Equal; + break; + case BuiltinOperator::BuiltinOperator_GREATER_EQUAL: + param.comparison_type = ir::operation::Comparison::ComparisonType::GreaterEqual; + break; + case BuiltinOperator::BuiltinOperator_GREATER: + param.comparison_type = ir::operation::Comparison::ComparisonType::Greater; + break; + case BuiltinOperator::BuiltinOperator_LESS_EQUAL: + param.comparison_type = ir::operation::Comparison::ComparisonType::LessEqual; + break; + case BuiltinOperator::BuiltinOperator_LESS: + param.comparison_type = ir::operation::Comparison::ComparisonType::Less; + break; + default: + throw std::runtime_error( + std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op))); + } + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Comparison(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadOneHot(const Operator *op) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::OneHot::Param param{}; + const auto *options = op->builtin_options_as_OneHotOptions(); + const auto axis = options->axis(); + const auto &indices = _graph.operands().at(inputs.at(ir::operation::OneHot::INDICES)); + auto indices_dims = indices.shape().rank(); + param.axis = (axis == -1) ? indices_dims : axis; + + std::unique_ptr<ir::Operation> new_op(new ir::operation::OneHot(inputs, outputs, param)); + _graph.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op) +{ + const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code(); + + switch (builtin_op) + { + case BuiltinOperator::BuiltinOperator_CONV_2D: + loadConv2D(op); + return; + case BuiltinOperator::BuiltinOperator_AVERAGE_POOL_2D: + loadAvgPool2D(op); + return; + case BuiltinOperator::BuiltinOperator_DEPTHWISE_CONV_2D: + loadDepthwiseConv2D(op); + return; + case BuiltinOperator::BuiltinOperator_TRANSPOSE_CONV: + loadTransposeConv(op); + return; + case BuiltinOperator::BuiltinOperator_RESHAPE: + loadReshape(op); + return; + case BuiltinOperator::BuiltinOperator_SOFTMAX: + loadSoftmax(op); + return; + case BuiltinOperator::BuiltinOperator_MAX_POOL_2D: + loadMaxPool2D(op); + return; + case BuiltinOperator::BuiltinOperator_CONCATENATION: + loadConcatenation(op); + return; + case BuiltinOperator::BuiltinOperator_FULLY_CONNECTED: + loadFC(op); + return; + case BuiltinOperator::BuiltinOperator_ADD: + loadAdd(op); + return; + case BuiltinOperator::BuiltinOperator_SUB: + loadSub(op); + return; + case BuiltinOperator::BuiltinOperator_MUL: + loadMul(op); + return; + case BuiltinOperator::BuiltinOperator_DIV: + loadDiv(op); + return; + case BuiltinOperator::BuiltinOperator_PACK: + loadPack(op); + return; + case BuiltinOperator::BuiltinOperator_RELU: + loadRelu(op); + return; + case BuiltinOperator::BuiltinOperator_RELU6: + loadRelu6(op); + return; + case BuiltinOperator::BuiltinOperator_RESIZE_BILINEAR: + loadResizeBilinear(op); + return; + case BuiltinOperator::BuiltinOperator_RSQRT: + loadRsqrt(op); + return; + case BuiltinOperator::BuiltinOperator_SQRT: + loadSqrt(op); + return; + case BuiltinOperator::BuiltinOperator_SQUARED_DIFFERENCE: + loadSquaredDifference(op); + return; + case BuiltinOperator::BuiltinOperator_TANH: + loadTanh(op); + return; + case BuiltinOperator::BuiltinOperator_TRANSPOSE: + loadTranspose(op); + return; + case BuiltinOperator::BuiltinOperator_MEAN: + loadMean(op); + return; + case BuiltinOperator::BuiltinOperator_REDUCE_MAX: + loadReduceMax(op); + return; + case BuiltinOperator::BuiltinOperator_PAD: + loadPad(op); + return; + case BuiltinOperator::BuiltinOperator_LOGISTIC: + loadLogistic(op); + return; + case BuiltinOperator::BuiltinOperator_EXP: + loadExp(op); + return; + case BuiltinOperator::BuiltinOperator_GATHER: + loadGather(op); + return; + case BuiltinOperator::BuiltinOperator_SPACE_TO_BATCH_ND: + loadSpaceToBatchND(op); + return; + case BuiltinOperator::BuiltinOperator_BATCH_TO_SPACE_ND: + loadBatchToSpaceND(op); + return; + case BuiltinOperator::BuiltinOperator_SUM: + loadReduceSum(op); + return; + case BuiltinOperator::BuiltinOperator_CUSTOM: + loadCustom(op); + return; + case BuiltinOperator::BuiltinOperator_SQUEEZE: + loadSqueeze(op); + return; + case BuiltinOperator::BuiltinOperator_PRELU: + loadPrelu(op); + return; + case BuiltinOperator::BuiltinOperator_SPLIT: + loadSplit(op); + return; + case BuiltinOperator::BuiltinOperator_SLICE: + loadSlice(op); + return; + case BuiltinOperator::BuiltinOperator_STRIDED_SLICE: + loadStridedSlice(op); + return; + case BuiltinOperator::BuiltinOperator_UNPACK: + loadUnpack(op); + return; + case BuiltinOperator::BuiltinOperator_MINIMUM: + loadMinimum(op); + return; + case BuiltinOperator::BuiltinOperator_MAXIMUM: + loadMaximum(op); + return; + case BuiltinOperator::BuiltinOperator_CAST: + loadCast(op); + return; + case BuiltinOperator::BuiltinOperator_EQUAL: + case BuiltinOperator::BuiltinOperator_GREATER_EQUAL: + case BuiltinOperator::BuiltinOperator_GREATER: + case BuiltinOperator::BuiltinOperator_LESS_EQUAL: + case BuiltinOperator::BuiltinOperator_LESS: + loadComparison(op); + return; + case BuiltinOperator::BuiltinOperator_ONE_HOT: + loadOneHot(op); + return; + default: + throw std::runtime_error( + std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op))); + } +} + +template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadModel() +{ + LoaderDomain::VerifyModelBuffer(*_verifier.get()); + _model = LoaderDomain::GetModel(_buffer.data()); + // Version unused + // const auto version = _model->version(); + // Description unused + // const auto *description = _model->description(); + // Metabuffer unsued + // const auto *metadata_buffer = _model->metadata_buffer(); + // Load subgraphs and mapping from op to subgraph + for (const auto *subgraph : *_model->subgraphs()) + { + static_cast<SpecificLoader *>(this)->loadSubgraph(subgraph); + } + + _graph.finishBuilding(); +} + +} // namespace base_loader +} // namespace neurun + +#endif //__BASE_LOADER_BASE_LOADER_H__ diff --git a/runtime/neurun/frontend/circle/CMakeLists.txt b/runtime/neurun/frontend/circle/CMakeLists.txt new file mode 100644 index 000000000..fea9725c2 --- /dev/null +++ b/runtime/neurun/frontend/circle/CMakeLists.txt @@ -0,0 +1,17 @@ +if (NOT BUILD_CIRCLE_LOADER) + return() +endif () + +nnfw_find_package(FlatBuffersSource REQUIRED) + +set(CIRCLE_LOADER_SOURCES src/circle_loader.cc) + +add_library(circle_loader SHARED ${CIRCLE_LOADER_SOURCES}) + +target_include_directories(circle_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) +target_include_directories(circle_loader PRIVATE ${FlatBuffersSource_DIR}/include) + +target_link_libraries(circle_loader PUBLIC neurun_core) +target_link_libraries(circle_loader PRIVATE base_loader nnfw_lib_cpp14 nnfw_common nnfw_coverage) + +install(TARGETS circle_loader DESTINATION lib) diff --git a/runtime/neurun/frontend/circle/include/circle_loader.h b/runtime/neurun/frontend/circle/include/circle_loader.h new file mode 100644 index 000000000..898bd32b1 --- /dev/null +++ b/runtime/neurun/frontend/circle/include/circle_loader.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CIRCLE_CIRCLE_LOADER_H__ +#define __CIRCLE_CIRCLE_LOADER_H__ + +#include "ir/Graph.h" + +#include <memory> + +namespace neurun +{ +namespace circle_loader +{ +std::unique_ptr<ir::Graph> loadModel(const char *filename); +} // namespace circle_loader +} // namespace neurun + +#endif // __CIRCLE_CIRCLE_LOADER_H__ diff --git a/runtime/neurun/frontend/circle/src/circle_loader.cc b/runtime/neurun/frontend/circle/src/circle_loader.cc new file mode 100644 index 000000000..cc48a793d --- /dev/null +++ b/runtime/neurun/frontend/circle/src/circle_loader.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "circle_loader.h" +#include "base_loader.h" +#include "circle_schema_generated.h" + +namespace neurun +{ +namespace circle_loader +{ + +namespace +{ + +struct LoaderDomain +{ + using Verifier = flatbuffers::Verifier; + using ActivationFunctionType = circle::ActivationFunctionType; + using Buffer = circle::Buffer; + using BuiltinOperator = circle::BuiltinOperator; + using CustomOptionsFormat = circle::CustomOptionsFormat; + using Model = circle::Model; + using Operator = circle::Operator; + using Padding = circle::Padding; + using Pool2DOptions = circle::Pool2DOptions; + using Tensor = circle::Tensor; + using TensorType = circle::TensorType; + using SubGraph = circle::SubGraph; + + static const char *EnumNameBuiltinOperator(BuiltinOperator e) + { + return circle::EnumNameBuiltinOperator(e); + } + static const char *EnumNameActivationFunctionType(ActivationFunctionType e) + { + return circle::EnumNameActivationFunctionType(e); + } + static const char *EnumNameTensorType(TensorType e) { return circle::EnumNameTensorType(e); } + static const Model *GetModel(const void *buf) { return circle::GetModel(buf); } + static bool VerifyModelBuffer(Verifier &verifier) { return circle::VerifyModelBuffer(verifier); } +}; + +class CircleLoader final : public base_loader::BaseLoader<LoaderDomain, CircleLoader> +{ +public: + using BaseLoader::BaseLoader; + + void loadSubgraph(const circle::SubGraph *subgraph) + { + // Load tensors + _tensor_to_operand.resize(subgraph->tensors()->size()); + for (flatbuffers::uoffset_t i = 0; i < subgraph->tensors()->size(); ++i) + { + _tensor_to_operand[i] = loadOperand(subgraph->tensors()->Get(i)); + } + // Set inputs + for (const std::int32_t input_ind : *subgraph->inputs()) + { + _graph.addInput(_tensor_to_operand[input_ind]); + } + // Set outputs + for (const std::int32_t output_ind : *subgraph->outputs()) + { + _graph.addOutput(_tensor_to_operand[output_ind]); + } + // Create operations + for (const auto *op : *subgraph->operators()) + { + CircleLoader::loadOperation(op); + } + + (void)subgraph->data_format(); + } + + void loadOperation(const circle::Operator *op) + { + const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code(); + + switch (builtin_op) + { + case circle::BuiltinOperator::BuiltinOperator_INSTANCE_NORM: + loadInstanceNorm(op); + return; + default: + BaseLoader::loadOperation(op); + return; + } + } +}; + +} // namespace + +std::unique_ptr<ir::Graph> loadModel(const char *filename) +{ + auto graph = nnfw::cpp14::make_unique<ir::Graph>(); + CircleLoader loader(*graph); + loader.loadFromFile(filename); + return graph; +} + +} // namespace circle_loader +} // namespace neurun diff --git a/runtime/neurun/frontend/circle/src/circle_schema_generated.h b/runtime/neurun/frontend/circle/src/circle_schema_generated.h new file mode 100644 index 000000000..5f7aad462 --- /dev/null +++ b/runtime/neurun/frontend/circle/src/circle_schema_generated.h @@ -0,0 +1,7546 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// automatically generated by the FlatBuffers compiler, do not modify + +#ifndef FLATBUFFERS_GENERATED_CIRCLESCHEMA_CIRCLE_H_ +#define FLATBUFFERS_GENERATED_CIRCLESCHEMA_CIRCLE_H_ + +#include "flatbuffers/flatbuffers.h" + +namespace circle +{ + +struct CustomQuantization; + +struct QuantizationParameters; + +struct Tensor; + +struct Conv2DOptions; + +struct Pool2DOptions; + +struct DepthwiseConv2DOptions; + +struct ConcatEmbeddingsOptions; + +struct LSHProjectionOptions; + +struct SVDFOptions; + +struct RNNOptions; + +struct SequenceRNNOptions; + +struct BidirectionalSequenceRNNOptions; + +struct FullyConnectedOptions; + +struct SoftmaxOptions; + +struct ConcatenationOptions; + +struct AddOptions; + +struct MulOptions; + +struct L2NormOptions; + +struct LocalResponseNormalizationOptions; + +struct LSTMOptions; + +struct UnidirectionalSequenceLSTMOptions; + +struct BidirectionalSequenceLSTMOptions; + +struct ResizeBilinearOptions; + +struct ResizeNearestNeighborOptions; + +struct CallOptions; + +struct PadOptions; + +struct PadV2Options; + +struct ReshapeOptions; + +struct SpaceToBatchNDOptions; + +struct BatchToSpaceNDOptions; + +struct SkipGramOptions; + +struct SpaceToDepthOptions; + +struct SubOptions; + +struct DivOptions; + +struct TopKV2Options; + +struct EmbeddingLookupSparseOptions; + +struct GatherOptions; + +struct TransposeOptions; + +struct ExpOptions; + +struct ReducerOptions; + +struct SqueezeOptions; + +struct SplitOptions; + +struct SplitVOptions; + +struct StridedSliceOptions; + +struct LogSoftmaxOptions; + +struct CastOptions; + +struct DequantizeOptions; + +struct MaximumMinimumOptions; + +struct TileOptions; + +struct ArgMaxOptions; + +struct ArgMinOptions; + +struct GreaterOptions; + +struct GreaterEqualOptions; + +struct LessOptions; + +struct LessEqualOptions; + +struct NegOptions; + +struct SelectOptions; + +struct SliceOptions; + +struct TransposeConvOptions; + +struct ExpandDimsOptions; + +struct SparseToDenseOptions; + +struct EqualOptions; + +struct NotEqualOptions; + +struct ShapeOptions; + +struct PowOptions; + +struct FakeQuantOptions; + +struct PackOptions; + +struct LogicalOrOptions; + +struct OneHotOptions; + +struct AbsOptions; + +struct LogicalAndOptions; + +struct LogicalNotOptions; + +struct UnpackOptions; + +struct FloorDivOptions; + +struct SquareOptions; + +struct ZerosLikeOptions; + +struct FillOptions; + +struct FloorModOptions; + +struct RangeOptions; + +struct LeakyReluOptions; + +struct SquaredDifferenceOptions; + +struct MirrorPadOptions; + +struct InstanceNormOptions; + +struct OperatorCode; + +struct Operator; + +struct SubGraph; + +struct Buffer; + +struct Model; + +enum TensorType +{ + TensorType_FLOAT32 = 0, + TensorType_FLOAT16 = 1, + TensorType_INT32 = 2, + TensorType_UINT8 = 3, + TensorType_INT64 = 4, + TensorType_STRING = 5, + TensorType_BOOL = 6, + TensorType_INT16 = 7, + TensorType_COMPLEX64 = 8, + TensorType_INT8 = 9, + TensorType_MIN = TensorType_FLOAT32, + TensorType_MAX = TensorType_INT8 +}; + +inline const TensorType (&EnumValuesTensorType())[10] +{ + static const TensorType values[] = {TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32, + TensorType_UINT8, TensorType_INT64, TensorType_STRING, + TensorType_BOOL, TensorType_INT16, TensorType_COMPLEX64, + TensorType_INT8}; + return values; +} + +inline const char *const *EnumNamesTensorType() +{ + static const char *const names[] = {"FLOAT32", "FLOAT16", "INT32", "UINT8", "INT64", "STRING", + "BOOL", "INT16", "COMPLEX64", "INT8", nullptr}; + return names; +} + +inline const char *EnumNameTensorType(TensorType e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesTensorType()[index]; +} + +enum QuantizationDetails +{ + QuantizationDetails_NONE = 0, + QuantizationDetails_CustomQuantization = 1, + QuantizationDetails_MIN = QuantizationDetails_NONE, + QuantizationDetails_MAX = QuantizationDetails_CustomQuantization +}; + +inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2] +{ + static const QuantizationDetails values[] = {QuantizationDetails_NONE, + QuantizationDetails_CustomQuantization}; + return values; +} + +inline const char *const *EnumNamesQuantizationDetails() +{ + static const char *const names[] = {"NONE", "CustomQuantization", nullptr}; + return names; +} + +inline const char *EnumNameQuantizationDetails(QuantizationDetails e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesQuantizationDetails()[index]; +} + +template <typename T> struct QuantizationDetailsTraits +{ + static const QuantizationDetails enum_value = QuantizationDetails_NONE; +}; + +template <> struct QuantizationDetailsTraits<CustomQuantization> +{ + static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization; +}; + +bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj, + QuantizationDetails type); +bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier, + const flatbuffers::Vector<flatbuffers::Offset<void>> *values, + const flatbuffers::Vector<uint8_t> *types); + +enum BuiltinOperator +{ + BuiltinOperator_ADD = 0, + BuiltinOperator_AVERAGE_POOL_2D = 1, + BuiltinOperator_CONCATENATION = 2, + BuiltinOperator_CONV_2D = 3, + BuiltinOperator_DEPTHWISE_CONV_2D = 4, + BuiltinOperator_DEQUANTIZE = 6, + BuiltinOperator_EMBEDDING_LOOKUP = 7, + BuiltinOperator_FLOOR = 8, + BuiltinOperator_FULLY_CONNECTED = 9, + BuiltinOperator_HASHTABLE_LOOKUP = 10, + BuiltinOperator_L2_NORMALIZATION = 11, + BuiltinOperator_L2_POOL_2D = 12, + BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION = 13, + BuiltinOperator_LOGISTIC = 14, + BuiltinOperator_LSH_PROJECTION = 15, + BuiltinOperator_LSTM = 16, + BuiltinOperator_MAX_POOL_2D = 17, + BuiltinOperator_MUL = 18, + BuiltinOperator_RELU = 19, + BuiltinOperator_RELU_N1_TO_1 = 20, + BuiltinOperator_RELU6 = 21, + BuiltinOperator_RESHAPE = 22, + BuiltinOperator_RESIZE_BILINEAR = 23, + BuiltinOperator_RNN = 24, + BuiltinOperator_SOFTMAX = 25, + BuiltinOperator_SPACE_TO_DEPTH = 26, + BuiltinOperator_SVDF = 27, + BuiltinOperator_TANH = 28, + BuiltinOperator_CONCAT_EMBEDDINGS = 29, + BuiltinOperator_SKIP_GRAM = 30, + BuiltinOperator_CALL = 31, + BuiltinOperator_CUSTOM = 32, + BuiltinOperator_EMBEDDING_LOOKUP_SPARSE = 33, + BuiltinOperator_PAD = 34, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN = 35, + BuiltinOperator_GATHER = 36, + BuiltinOperator_BATCH_TO_SPACE_ND = 37, + BuiltinOperator_SPACE_TO_BATCH_ND = 38, + BuiltinOperator_TRANSPOSE = 39, + BuiltinOperator_MEAN = 40, + BuiltinOperator_SUB = 41, + BuiltinOperator_DIV = 42, + BuiltinOperator_SQUEEZE = 43, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM = 44, + BuiltinOperator_STRIDED_SLICE = 45, + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN = 46, + BuiltinOperator_EXP = 47, + BuiltinOperator_TOPK_V2 = 48, + BuiltinOperator_SPLIT = 49, + BuiltinOperator_LOG_SOFTMAX = 50, + BuiltinOperator_DELEGATE = 51, + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52, + BuiltinOperator_CAST = 53, + BuiltinOperator_PRELU = 54, + BuiltinOperator_MAXIMUM = 55, + BuiltinOperator_ARG_MAX = 56, + BuiltinOperator_MINIMUM = 57, + BuiltinOperator_LESS = 58, + BuiltinOperator_NEG = 59, + BuiltinOperator_PADV2 = 60, + BuiltinOperator_GREATER = 61, + BuiltinOperator_GREATER_EQUAL = 62, + BuiltinOperator_LESS_EQUAL = 63, + BuiltinOperator_SELECT = 64, + BuiltinOperator_SLICE = 65, + BuiltinOperator_SIN = 66, + BuiltinOperator_TRANSPOSE_CONV = 67, + BuiltinOperator_SPARSE_TO_DENSE = 68, + BuiltinOperator_TILE = 69, + BuiltinOperator_EXPAND_DIMS = 70, + BuiltinOperator_EQUAL = 71, + BuiltinOperator_NOT_EQUAL = 72, + BuiltinOperator_LOG = 73, + BuiltinOperator_SUM = 74, + BuiltinOperator_SQRT = 75, + BuiltinOperator_RSQRT = 76, + BuiltinOperator_SHAPE = 77, + BuiltinOperator_POW = 78, + BuiltinOperator_ARG_MIN = 79, + BuiltinOperator_FAKE_QUANT = 80, + BuiltinOperator_REDUCE_PROD = 81, + BuiltinOperator_REDUCE_MAX = 82, + BuiltinOperator_PACK = 83, + BuiltinOperator_LOGICAL_OR = 84, + BuiltinOperator_ONE_HOT = 85, + BuiltinOperator_LOGICAL_AND = 86, + BuiltinOperator_LOGICAL_NOT = 87, + BuiltinOperator_UNPACK = 88, + BuiltinOperator_REDUCE_MIN = 89, + BuiltinOperator_FLOOR_DIV = 90, + BuiltinOperator_REDUCE_ANY = 91, + BuiltinOperator_SQUARE = 92, + BuiltinOperator_ZEROS_LIKE = 93, + BuiltinOperator_FILL = 94, + BuiltinOperator_FLOOR_MOD = 95, + BuiltinOperator_RANGE = 96, + BuiltinOperator_RESIZE_NEAREST_NEIGHBOR = 97, + BuiltinOperator_LEAKY_RELU = 98, + BuiltinOperator_SQUARED_DIFFERENCE = 99, + BuiltinOperator_MIRROR_PAD = 100, + BuiltinOperator_ABS = 101, + BuiltinOperator_SPLIT_V = 102, + BuiltinOperator_INSTANCE_NORM = 254, + BuiltinOperator_MIN = BuiltinOperator_ADD, + BuiltinOperator_MAX = BuiltinOperator_INSTANCE_NORM +}; + +inline const BuiltinOperator (&EnumValuesBuiltinOperator())[103] +{ + static const BuiltinOperator values[] = {BuiltinOperator_ADD, + BuiltinOperator_AVERAGE_POOL_2D, + BuiltinOperator_CONCATENATION, + BuiltinOperator_CONV_2D, + BuiltinOperator_DEPTHWISE_CONV_2D, + BuiltinOperator_DEQUANTIZE, + BuiltinOperator_EMBEDDING_LOOKUP, + BuiltinOperator_FLOOR, + BuiltinOperator_FULLY_CONNECTED, + BuiltinOperator_HASHTABLE_LOOKUP, + BuiltinOperator_L2_NORMALIZATION, + BuiltinOperator_L2_POOL_2D, + BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, + BuiltinOperator_LOGISTIC, + BuiltinOperator_LSH_PROJECTION, + BuiltinOperator_LSTM, + BuiltinOperator_MAX_POOL_2D, + BuiltinOperator_MUL, + BuiltinOperator_RELU, + BuiltinOperator_RELU_N1_TO_1, + BuiltinOperator_RELU6, + BuiltinOperator_RESHAPE, + BuiltinOperator_RESIZE_BILINEAR, + BuiltinOperator_RNN, + BuiltinOperator_SOFTMAX, + BuiltinOperator_SPACE_TO_DEPTH, + BuiltinOperator_SVDF, + BuiltinOperator_TANH, + BuiltinOperator_CONCAT_EMBEDDINGS, + BuiltinOperator_SKIP_GRAM, + BuiltinOperator_CALL, + BuiltinOperator_CUSTOM, + BuiltinOperator_EMBEDDING_LOOKUP_SPARSE, + BuiltinOperator_PAD, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN, + BuiltinOperator_GATHER, + BuiltinOperator_BATCH_TO_SPACE_ND, + BuiltinOperator_SPACE_TO_BATCH_ND, + BuiltinOperator_TRANSPOSE, + BuiltinOperator_MEAN, + BuiltinOperator_SUB, + BuiltinOperator_DIV, + BuiltinOperator_SQUEEZE, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, + BuiltinOperator_STRIDED_SLICE, + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN, + BuiltinOperator_EXP, + BuiltinOperator_TOPK_V2, + BuiltinOperator_SPLIT, + BuiltinOperator_LOG_SOFTMAX, + BuiltinOperator_DELEGATE, + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, + BuiltinOperator_CAST, + BuiltinOperator_PRELU, + BuiltinOperator_MAXIMUM, + BuiltinOperator_ARG_MAX, + BuiltinOperator_MINIMUM, + BuiltinOperator_LESS, + BuiltinOperator_NEG, + BuiltinOperator_PADV2, + BuiltinOperator_GREATER, + BuiltinOperator_GREATER_EQUAL, + BuiltinOperator_LESS_EQUAL, + BuiltinOperator_SELECT, + BuiltinOperator_SLICE, + BuiltinOperator_SIN, + BuiltinOperator_TRANSPOSE_CONV, + BuiltinOperator_SPARSE_TO_DENSE, + BuiltinOperator_TILE, + BuiltinOperator_EXPAND_DIMS, + BuiltinOperator_EQUAL, + BuiltinOperator_NOT_EQUAL, + BuiltinOperator_LOG, + BuiltinOperator_SUM, + BuiltinOperator_SQRT, + BuiltinOperator_RSQRT, + BuiltinOperator_SHAPE, + BuiltinOperator_POW, + BuiltinOperator_ARG_MIN, + BuiltinOperator_FAKE_QUANT, + BuiltinOperator_REDUCE_PROD, + BuiltinOperator_REDUCE_MAX, + BuiltinOperator_PACK, + BuiltinOperator_LOGICAL_OR, + BuiltinOperator_ONE_HOT, + BuiltinOperator_LOGICAL_AND, + BuiltinOperator_LOGICAL_NOT, + BuiltinOperator_UNPACK, + BuiltinOperator_REDUCE_MIN, + BuiltinOperator_FLOOR_DIV, + BuiltinOperator_REDUCE_ANY, + BuiltinOperator_SQUARE, + BuiltinOperator_ZEROS_LIKE, + BuiltinOperator_FILL, + BuiltinOperator_FLOOR_MOD, + BuiltinOperator_RANGE, + BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, + BuiltinOperator_LEAKY_RELU, + BuiltinOperator_SQUARED_DIFFERENCE, + BuiltinOperator_MIRROR_PAD, + BuiltinOperator_ABS, + BuiltinOperator_SPLIT_V, + BuiltinOperator_INSTANCE_NORM}; + return values; +} + +inline const char *const *EnumNamesBuiltinOperator() +{ + static const char *const names[] = {"ADD", + "AVERAGE_POOL_2D", + "CONCATENATION", + "CONV_2D", + "DEPTHWISE_CONV_2D", + "", + "DEQUANTIZE", + "EMBEDDING_LOOKUP", + "FLOOR", + "FULLY_CONNECTED", + "HASHTABLE_LOOKUP", + "L2_NORMALIZATION", + "L2_POOL_2D", + "LOCAL_RESPONSE_NORMALIZATION", + "LOGISTIC", + "LSH_PROJECTION", + "LSTM", + "MAX_POOL_2D", + "MUL", + "RELU", + "RELU_N1_TO_1", + "RELU6", + "RESHAPE", + "RESIZE_BILINEAR", + "RNN", + "SOFTMAX", + "SPACE_TO_DEPTH", + "SVDF", + "TANH", + "CONCAT_EMBEDDINGS", + "SKIP_GRAM", + "CALL", + "CUSTOM", + "EMBEDDING_LOOKUP_SPARSE", + "PAD", + "UNIDIRECTIONAL_SEQUENCE_RNN", + "GATHER", + "BATCH_TO_SPACE_ND", + "SPACE_TO_BATCH_ND", + "TRANSPOSE", + "MEAN", + "SUB", + "DIV", + "SQUEEZE", + "UNIDIRECTIONAL_SEQUENCE_LSTM", + "STRIDED_SLICE", + "BIDIRECTIONAL_SEQUENCE_RNN", + "EXP", + "TOPK_V2", + "SPLIT", + "LOG_SOFTMAX", + "DELEGATE", + "BIDIRECTIONAL_SEQUENCE_LSTM", + "CAST", + "PRELU", + "MAXIMUM", + "ARG_MAX", + "MINIMUM", + "LESS", + "NEG", + "PADV2", + "GREATER", + "GREATER_EQUAL", + "LESS_EQUAL", + "SELECT", + "SLICE", + "SIN", + "TRANSPOSE_CONV", + "SPARSE_TO_DENSE", + "TILE", + "EXPAND_DIMS", + "EQUAL", + "NOT_EQUAL", + "LOG", + "SUM", + "SQRT", + "RSQRT", + "SHAPE", + "POW", + "ARG_MIN", + "FAKE_QUANT", + "REDUCE_PROD", + "REDUCE_MAX", + "PACK", + "LOGICAL_OR", + "ONE_HOT", + "LOGICAL_AND", + "LOGICAL_NOT", + "UNPACK", + "REDUCE_MIN", + "FLOOR_DIV", + "REDUCE_ANY", + "SQUARE", + "ZEROS_LIKE", + "FILL", + "FLOOR_MOD", + "RANGE", + "RESIZE_NEAREST_NEIGHBOR", + "LEAKY_RELU", + "SQUARED_DIFFERENCE", + "MIRROR_PAD", + "ABS", + "SPLIT_V", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "INSTANCE_NORM", + nullptr}; + return names; +} + +inline const char *EnumNameBuiltinOperator(BuiltinOperator e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesBuiltinOperator()[index]; +} + +enum BuiltinOptions +{ + BuiltinOptions_NONE = 0, + BuiltinOptions_Conv2DOptions = 1, + BuiltinOptions_DepthwiseConv2DOptions = 2, + BuiltinOptions_ConcatEmbeddingsOptions = 3, + BuiltinOptions_LSHProjectionOptions = 4, + BuiltinOptions_Pool2DOptions = 5, + BuiltinOptions_SVDFOptions = 6, + BuiltinOptions_RNNOptions = 7, + BuiltinOptions_FullyConnectedOptions = 8, + BuiltinOptions_SoftmaxOptions = 9, + BuiltinOptions_ConcatenationOptions = 10, + BuiltinOptions_AddOptions = 11, + BuiltinOptions_L2NormOptions = 12, + BuiltinOptions_LocalResponseNormalizationOptions = 13, + BuiltinOptions_LSTMOptions = 14, + BuiltinOptions_ResizeBilinearOptions = 15, + BuiltinOptions_CallOptions = 16, + BuiltinOptions_ReshapeOptions = 17, + BuiltinOptions_SkipGramOptions = 18, + BuiltinOptions_SpaceToDepthOptions = 19, + BuiltinOptions_EmbeddingLookupSparseOptions = 20, + BuiltinOptions_MulOptions = 21, + BuiltinOptions_PadOptions = 22, + BuiltinOptions_GatherOptions = 23, + BuiltinOptions_BatchToSpaceNDOptions = 24, + BuiltinOptions_SpaceToBatchNDOptions = 25, + BuiltinOptions_TransposeOptions = 26, + BuiltinOptions_ReducerOptions = 27, + BuiltinOptions_SubOptions = 28, + BuiltinOptions_DivOptions = 29, + BuiltinOptions_SqueezeOptions = 30, + BuiltinOptions_SequenceRNNOptions = 31, + BuiltinOptions_StridedSliceOptions = 32, + BuiltinOptions_ExpOptions = 33, + BuiltinOptions_TopKV2Options = 34, + BuiltinOptions_SplitOptions = 35, + BuiltinOptions_LogSoftmaxOptions = 36, + BuiltinOptions_CastOptions = 37, + BuiltinOptions_DequantizeOptions = 38, + BuiltinOptions_MaximumMinimumOptions = 39, + BuiltinOptions_ArgMaxOptions = 40, + BuiltinOptions_LessOptions = 41, + BuiltinOptions_NegOptions = 42, + BuiltinOptions_PadV2Options = 43, + BuiltinOptions_GreaterOptions = 44, + BuiltinOptions_GreaterEqualOptions = 45, + BuiltinOptions_LessEqualOptions = 46, + BuiltinOptions_SelectOptions = 47, + BuiltinOptions_SliceOptions = 48, + BuiltinOptions_TransposeConvOptions = 49, + BuiltinOptions_SparseToDenseOptions = 50, + BuiltinOptions_TileOptions = 51, + BuiltinOptions_ExpandDimsOptions = 52, + BuiltinOptions_EqualOptions = 53, + BuiltinOptions_NotEqualOptions = 54, + BuiltinOptions_ShapeOptions = 55, + BuiltinOptions_PowOptions = 56, + BuiltinOptions_ArgMinOptions = 57, + BuiltinOptions_FakeQuantOptions = 58, + BuiltinOptions_PackOptions = 59, + BuiltinOptions_LogicalOrOptions = 60, + BuiltinOptions_OneHotOptions = 61, + BuiltinOptions_LogicalAndOptions = 62, + BuiltinOptions_LogicalNotOptions = 63, + BuiltinOptions_UnpackOptions = 64, + BuiltinOptions_FloorDivOptions = 65, + BuiltinOptions_SquareOptions = 66, + BuiltinOptions_ZerosLikeOptions = 67, + BuiltinOptions_FillOptions = 68, + BuiltinOptions_BidirectionalSequenceLSTMOptions = 69, + BuiltinOptions_BidirectionalSequenceRNNOptions = 70, + BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71, + BuiltinOptions_FloorModOptions = 72, + BuiltinOptions_RangeOptions = 73, + BuiltinOptions_ResizeNearestNeighborOptions = 74, + BuiltinOptions_LeakyReluOptions = 75, + BuiltinOptions_SquaredDifferenceOptions = 76, + BuiltinOptions_MirrorPadOptions = 77, + BuiltinOptions_AbsOptions = 78, + BuiltinOptions_SplitVOptions = 79, + BuiltinOptions_InstanceNormOptions = 80, + BuiltinOptions_MIN = BuiltinOptions_NONE, + BuiltinOptions_MAX = BuiltinOptions_InstanceNormOptions +}; + +inline const BuiltinOptions (&EnumValuesBuiltinOptions())[81] +{ + static const BuiltinOptions values[] = {BuiltinOptions_NONE, + BuiltinOptions_Conv2DOptions, + BuiltinOptions_DepthwiseConv2DOptions, + BuiltinOptions_ConcatEmbeddingsOptions, + BuiltinOptions_LSHProjectionOptions, + BuiltinOptions_Pool2DOptions, + BuiltinOptions_SVDFOptions, + BuiltinOptions_RNNOptions, + BuiltinOptions_FullyConnectedOptions, + BuiltinOptions_SoftmaxOptions, + BuiltinOptions_ConcatenationOptions, + BuiltinOptions_AddOptions, + BuiltinOptions_L2NormOptions, + BuiltinOptions_LocalResponseNormalizationOptions, + BuiltinOptions_LSTMOptions, + BuiltinOptions_ResizeBilinearOptions, + BuiltinOptions_CallOptions, + BuiltinOptions_ReshapeOptions, + BuiltinOptions_SkipGramOptions, + BuiltinOptions_SpaceToDepthOptions, + BuiltinOptions_EmbeddingLookupSparseOptions, + BuiltinOptions_MulOptions, + BuiltinOptions_PadOptions, + BuiltinOptions_GatherOptions, + BuiltinOptions_BatchToSpaceNDOptions, + BuiltinOptions_SpaceToBatchNDOptions, + BuiltinOptions_TransposeOptions, + BuiltinOptions_ReducerOptions, + BuiltinOptions_SubOptions, + BuiltinOptions_DivOptions, + BuiltinOptions_SqueezeOptions, + BuiltinOptions_SequenceRNNOptions, + BuiltinOptions_StridedSliceOptions, + BuiltinOptions_ExpOptions, + BuiltinOptions_TopKV2Options, + BuiltinOptions_SplitOptions, + BuiltinOptions_LogSoftmaxOptions, + BuiltinOptions_CastOptions, + BuiltinOptions_DequantizeOptions, + BuiltinOptions_MaximumMinimumOptions, + BuiltinOptions_ArgMaxOptions, + BuiltinOptions_LessOptions, + BuiltinOptions_NegOptions, + BuiltinOptions_PadV2Options, + BuiltinOptions_GreaterOptions, + BuiltinOptions_GreaterEqualOptions, + BuiltinOptions_LessEqualOptions, + BuiltinOptions_SelectOptions, + BuiltinOptions_SliceOptions, + BuiltinOptions_TransposeConvOptions, + BuiltinOptions_SparseToDenseOptions, + BuiltinOptions_TileOptions, + BuiltinOptions_ExpandDimsOptions, + BuiltinOptions_EqualOptions, + BuiltinOptions_NotEqualOptions, + BuiltinOptions_ShapeOptions, + BuiltinOptions_PowOptions, + BuiltinOptions_ArgMinOptions, + BuiltinOptions_FakeQuantOptions, + BuiltinOptions_PackOptions, + BuiltinOptions_LogicalOrOptions, + BuiltinOptions_OneHotOptions, + BuiltinOptions_LogicalAndOptions, + BuiltinOptions_LogicalNotOptions, + BuiltinOptions_UnpackOptions, + BuiltinOptions_FloorDivOptions, + BuiltinOptions_SquareOptions, + BuiltinOptions_ZerosLikeOptions, + BuiltinOptions_FillOptions, + BuiltinOptions_BidirectionalSequenceLSTMOptions, + BuiltinOptions_BidirectionalSequenceRNNOptions, + BuiltinOptions_UnidirectionalSequenceLSTMOptions, + BuiltinOptions_FloorModOptions, + BuiltinOptions_RangeOptions, + BuiltinOptions_ResizeNearestNeighborOptions, + BuiltinOptions_LeakyReluOptions, + BuiltinOptions_SquaredDifferenceOptions, + BuiltinOptions_MirrorPadOptions, + BuiltinOptions_AbsOptions, + BuiltinOptions_SplitVOptions, + BuiltinOptions_InstanceNormOptions}; + return values; +} + +inline const char *const *EnumNamesBuiltinOptions() +{ + static const char *const names[] = {"NONE", + "Conv2DOptions", + "DepthwiseConv2DOptions", + "ConcatEmbeddingsOptions", + "LSHProjectionOptions", + "Pool2DOptions", + "SVDFOptions", + "RNNOptions", + "FullyConnectedOptions", + "SoftmaxOptions", + "ConcatenationOptions", + "AddOptions", + "L2NormOptions", + "LocalResponseNormalizationOptions", + "LSTMOptions", + "ResizeBilinearOptions", + "CallOptions", + "ReshapeOptions", + "SkipGramOptions", + "SpaceToDepthOptions", + "EmbeddingLookupSparseOptions", + "MulOptions", + "PadOptions", + "GatherOptions", + "BatchToSpaceNDOptions", + "SpaceToBatchNDOptions", + "TransposeOptions", + "ReducerOptions", + "SubOptions", + "DivOptions", + "SqueezeOptions", + "SequenceRNNOptions", + "StridedSliceOptions", + "ExpOptions", + "TopKV2Options", + "SplitOptions", + "LogSoftmaxOptions", + "CastOptions", + "DequantizeOptions", + "MaximumMinimumOptions", + "ArgMaxOptions", + "LessOptions", + "NegOptions", + "PadV2Options", + "GreaterOptions", + "GreaterEqualOptions", + "LessEqualOptions", + "SelectOptions", + "SliceOptions", + "TransposeConvOptions", + "SparseToDenseOptions", + "TileOptions", + "ExpandDimsOptions", + "EqualOptions", + "NotEqualOptions", + "ShapeOptions", + "PowOptions", + "ArgMinOptions", + "FakeQuantOptions", + "PackOptions", + "LogicalOrOptions", + "OneHotOptions", + "LogicalAndOptions", + "LogicalNotOptions", + "UnpackOptions", + "FloorDivOptions", + "SquareOptions", + "ZerosLikeOptions", + "FillOptions", + "BidirectionalSequenceLSTMOptions", + "BidirectionalSequenceRNNOptions", + "UnidirectionalSequenceLSTMOptions", + "FloorModOptions", + "RangeOptions", + "ResizeNearestNeighborOptions", + "LeakyReluOptions", + "SquaredDifferenceOptions", + "MirrorPadOptions", + "AbsOptions", + "SplitVOptions", + "InstanceNormOptions", + nullptr}; + return names; +} + +inline const char *EnumNameBuiltinOptions(BuiltinOptions e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesBuiltinOptions()[index]; +} + +template <typename T> struct BuiltinOptionsTraits +{ + static const BuiltinOptions enum_value = BuiltinOptions_NONE; +}; + +template <> struct BuiltinOptionsTraits<Conv2DOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions; +}; + +template <> struct BuiltinOptionsTraits<DepthwiseConv2DOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions; +}; + +template <> struct BuiltinOptionsTraits<ConcatEmbeddingsOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions; +}; + +template <> struct BuiltinOptionsTraits<LSHProjectionOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions; +}; + +template <> struct BuiltinOptionsTraits<Pool2DOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions; +}; + +template <> struct BuiltinOptionsTraits<SVDFOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions; +}; + +template <> struct BuiltinOptionsTraits<RNNOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions; +}; + +template <> struct BuiltinOptionsTraits<FullyConnectedOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions; +}; + +template <> struct BuiltinOptionsTraits<SoftmaxOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions; +}; + +template <> struct BuiltinOptionsTraits<ConcatenationOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions; +}; + +template <> struct BuiltinOptionsTraits<AddOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_AddOptions; +}; + +template <> struct BuiltinOptionsTraits<L2NormOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions; +}; + +template <> struct BuiltinOptionsTraits<LocalResponseNormalizationOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions; +}; + +template <> struct BuiltinOptionsTraits<LSTMOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions; +}; + +template <> struct BuiltinOptionsTraits<ResizeBilinearOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions; +}; + +template <> struct BuiltinOptionsTraits<CallOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_CallOptions; +}; + +template <> struct BuiltinOptionsTraits<ReshapeOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions; +}; + +template <> struct BuiltinOptionsTraits<SkipGramOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions; +}; + +template <> struct BuiltinOptionsTraits<SpaceToDepthOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions; +}; + +template <> struct BuiltinOptionsTraits<EmbeddingLookupSparseOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions; +}; + +template <> struct BuiltinOptionsTraits<MulOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_MulOptions; +}; + +template <> struct BuiltinOptionsTraits<PadOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_PadOptions; +}; + +template <> struct BuiltinOptionsTraits<GatherOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions; +}; + +template <> struct BuiltinOptionsTraits<BatchToSpaceNDOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions; +}; + +template <> struct BuiltinOptionsTraits<SpaceToBatchNDOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions; +}; + +template <> struct BuiltinOptionsTraits<TransposeOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions; +}; + +template <> struct BuiltinOptionsTraits<ReducerOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ReducerOptions; +}; + +template <> struct BuiltinOptionsTraits<SubOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SubOptions; +}; + +template <> struct BuiltinOptionsTraits<DivOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_DivOptions; +}; + +template <> struct BuiltinOptionsTraits<SqueezeOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions; +}; + +template <> struct BuiltinOptionsTraits<SequenceRNNOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions; +}; + +template <> struct BuiltinOptionsTraits<StridedSliceOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions; +}; + +template <> struct BuiltinOptionsTraits<ExpOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions; +}; + +template <> struct BuiltinOptionsTraits<TopKV2Options> +{ + static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options; +}; + +template <> struct BuiltinOptionsTraits<SplitOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions; +}; + +template <> struct BuiltinOptionsTraits<LogSoftmaxOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LogSoftmaxOptions; +}; + +template <> struct BuiltinOptionsTraits<CastOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_CastOptions; +}; + +template <> struct BuiltinOptionsTraits<DequantizeOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions; +}; + +template <> struct BuiltinOptionsTraits<MaximumMinimumOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_MaximumMinimumOptions; +}; + +template <> struct BuiltinOptionsTraits<ArgMaxOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ArgMaxOptions; +}; + +template <> struct BuiltinOptionsTraits<LessOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LessOptions; +}; + +template <> struct BuiltinOptionsTraits<NegOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_NegOptions; +}; + +template <> struct BuiltinOptionsTraits<PadV2Options> +{ + static const BuiltinOptions enum_value = BuiltinOptions_PadV2Options; +}; + +template <> struct BuiltinOptionsTraits<GreaterOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_GreaterOptions; +}; + +template <> struct BuiltinOptionsTraits<GreaterEqualOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_GreaterEqualOptions; +}; + +template <> struct BuiltinOptionsTraits<LessEqualOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LessEqualOptions; +}; + +template <> struct BuiltinOptionsTraits<SelectOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SelectOptions; +}; + +template <> struct BuiltinOptionsTraits<SliceOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SliceOptions; +}; + +template <> struct BuiltinOptionsTraits<TransposeConvOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_TransposeConvOptions; +}; + +template <> struct BuiltinOptionsTraits<SparseToDenseOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SparseToDenseOptions; +}; + +template <> struct BuiltinOptionsTraits<TileOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_TileOptions; +}; + +template <> struct BuiltinOptionsTraits<ExpandDimsOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ExpandDimsOptions; +}; + +template <> struct BuiltinOptionsTraits<EqualOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_EqualOptions; +}; + +template <> struct BuiltinOptionsTraits<NotEqualOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_NotEqualOptions; +}; + +template <> struct BuiltinOptionsTraits<ShapeOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ShapeOptions; +}; + +template <> struct BuiltinOptionsTraits<PowOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_PowOptions; +}; + +template <> struct BuiltinOptionsTraits<ArgMinOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ArgMinOptions; +}; + +template <> struct BuiltinOptionsTraits<FakeQuantOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_FakeQuantOptions; +}; + +template <> struct BuiltinOptionsTraits<PackOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_PackOptions; +}; + +template <> struct BuiltinOptionsTraits<LogicalOrOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LogicalOrOptions; +}; + +template <> struct BuiltinOptionsTraits<OneHotOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_OneHotOptions; +}; + +template <> struct BuiltinOptionsTraits<LogicalAndOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LogicalAndOptions; +}; + +template <> struct BuiltinOptionsTraits<LogicalNotOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LogicalNotOptions; +}; + +template <> struct BuiltinOptionsTraits<UnpackOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_UnpackOptions; +}; + +template <> struct BuiltinOptionsTraits<FloorDivOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_FloorDivOptions; +}; + +template <> struct BuiltinOptionsTraits<SquareOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions; +}; + +template <> struct BuiltinOptionsTraits<ZerosLikeOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions; +}; + +template <> struct BuiltinOptionsTraits<FillOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_FillOptions; +}; + +template <> struct BuiltinOptionsTraits<BidirectionalSequenceLSTMOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions; +}; + +template <> struct BuiltinOptionsTraits<BidirectionalSequenceRNNOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions; +}; + +template <> struct BuiltinOptionsTraits<UnidirectionalSequenceLSTMOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions; +}; + +template <> struct BuiltinOptionsTraits<FloorModOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions; +}; + +template <> struct BuiltinOptionsTraits<RangeOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions; +}; + +template <> struct BuiltinOptionsTraits<ResizeNearestNeighborOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ResizeNearestNeighborOptions; +}; + +template <> struct BuiltinOptionsTraits<LeakyReluOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LeakyReluOptions; +}; + +template <> struct BuiltinOptionsTraits<SquaredDifferenceOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SquaredDifferenceOptions; +}; + +template <> struct BuiltinOptionsTraits<MirrorPadOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_MirrorPadOptions; +}; + +template <> struct BuiltinOptionsTraits<AbsOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_AbsOptions; +}; + +template <> struct BuiltinOptionsTraits<SplitVOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SplitVOptions; +}; + +template <> struct BuiltinOptionsTraits<InstanceNormOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_InstanceNormOptions; +}; + +bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); +bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, + const flatbuffers::Vector<flatbuffers::Offset<void>> *values, + const flatbuffers::Vector<uint8_t> *types); + +enum Padding +{ + Padding_SAME = 0, + Padding_VALID = 1, + Padding_MIN = Padding_SAME, + Padding_MAX = Padding_VALID +}; + +inline const Padding (&EnumValuesPadding())[2] +{ + static const Padding values[] = {Padding_SAME, Padding_VALID}; + return values; +} + +inline const char *const *EnumNamesPadding() +{ + static const char *const names[] = {"SAME", "VALID", nullptr}; + return names; +} + +inline const char *EnumNamePadding(Padding e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesPadding()[index]; +} + +enum ActivationFunctionType +{ + ActivationFunctionType_NONE = 0, + ActivationFunctionType_RELU = 1, + ActivationFunctionType_RELU_N1_TO_1 = 2, + ActivationFunctionType_RELU6 = 3, + ActivationFunctionType_TANH = 4, + ActivationFunctionType_SIGN_BIT = 5, + ActivationFunctionType_MIN = ActivationFunctionType_NONE, + ActivationFunctionType_MAX = ActivationFunctionType_SIGN_BIT +}; + +inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6] +{ + static const ActivationFunctionType values[] = { + ActivationFunctionType_NONE, ActivationFunctionType_RELU, + ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6, + ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT}; + return values; +} + +inline const char *const *EnumNamesActivationFunctionType() +{ + static const char *const names[] = {"NONE", "RELU", "RELU_N1_TO_1", "RELU6", + "TANH", "SIGN_BIT", nullptr}; + return names; +} + +inline const char *EnumNameActivationFunctionType(ActivationFunctionType e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesActivationFunctionType()[index]; +} + +enum LSHProjectionType +{ + LSHProjectionType_UNKNOWN = 0, + LSHProjectionType_SPARSE = 1, + LSHProjectionType_DENSE = 2, + LSHProjectionType_MIN = LSHProjectionType_UNKNOWN, + LSHProjectionType_MAX = LSHProjectionType_DENSE +}; + +inline const LSHProjectionType (&EnumValuesLSHProjectionType())[3] +{ + static const LSHProjectionType values[] = {LSHProjectionType_UNKNOWN, LSHProjectionType_SPARSE, + LSHProjectionType_DENSE}; + return values; +} + +inline const char *const *EnumNamesLSHProjectionType() +{ + static const char *const names[] = {"UNKNOWN", "SPARSE", "DENSE", nullptr}; + return names; +} + +inline const char *EnumNameLSHProjectionType(LSHProjectionType e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesLSHProjectionType()[index]; +} + +enum FullyConnectedOptionsWeightsFormat +{ + FullyConnectedOptionsWeightsFormat_DEFAULT = 0, + FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1, + FullyConnectedOptionsWeightsFormat_MIN = FullyConnectedOptionsWeightsFormat_DEFAULT, + FullyConnectedOptionsWeightsFormat_MAX = FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 +}; + +inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2] +{ + static const FullyConnectedOptionsWeightsFormat values[] = { + FullyConnectedOptionsWeightsFormat_DEFAULT, + FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8}; + return values; +} + +inline const char *const *EnumNamesFullyConnectedOptionsWeightsFormat() +{ + static const char *const names[] = {"DEFAULT", "SHUFFLED4x16INT8", nullptr}; + return names; +} + +inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOptionsWeightsFormat e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesFullyConnectedOptionsWeightsFormat()[index]; +} + +enum LSTMKernelType +{ + LSTMKernelType_FULL = 0, + LSTMKernelType_BASIC = 1, + LSTMKernelType_MIN = LSTMKernelType_FULL, + LSTMKernelType_MAX = LSTMKernelType_BASIC +}; + +inline const LSTMKernelType (&EnumValuesLSTMKernelType())[2] +{ + static const LSTMKernelType values[] = {LSTMKernelType_FULL, LSTMKernelType_BASIC}; + return values; +} + +inline const char *const *EnumNamesLSTMKernelType() +{ + static const char *const names[] = {"FULL", "BASIC", nullptr}; + return names; +} + +inline const char *EnumNameLSTMKernelType(LSTMKernelType e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesLSTMKernelType()[index]; +} + +enum CombinerType +{ + CombinerType_SUM = 0, + CombinerType_MEAN = 1, + CombinerType_SQRTN = 2, + CombinerType_MIN = CombinerType_SUM, + CombinerType_MAX = CombinerType_SQRTN +}; + +inline const CombinerType (&EnumValuesCombinerType())[3] +{ + static const CombinerType values[] = {CombinerType_SUM, CombinerType_MEAN, CombinerType_SQRTN}; + return values; +} + +inline const char *const *EnumNamesCombinerType() +{ + static const char *const names[] = {"SUM", "MEAN", "SQRTN", nullptr}; + return names; +} + +inline const char *EnumNameCombinerType(CombinerType e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesCombinerType()[index]; +} + +enum MirrorPadMode +{ + MirrorPadMode_REFLECT = 0, + MirrorPadMode_SYMMETRIC = 1, + MirrorPadMode_MIN = MirrorPadMode_REFLECT, + MirrorPadMode_MAX = MirrorPadMode_SYMMETRIC +}; + +inline const MirrorPadMode (&EnumValuesMirrorPadMode())[2] +{ + static const MirrorPadMode values[] = {MirrorPadMode_REFLECT, MirrorPadMode_SYMMETRIC}; + return values; +} + +inline const char *const *EnumNamesMirrorPadMode() +{ + static const char *const names[] = {"REFLECT", "SYMMETRIC", nullptr}; + return names; +} + +inline const char *EnumNameMirrorPadMode(MirrorPadMode e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesMirrorPadMode()[index]; +} + +enum CustomOptionsFormat +{ + CustomOptionsFormat_FLEXBUFFERS = 0, + CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS, + CustomOptionsFormat_MAX = CustomOptionsFormat_FLEXBUFFERS +}; + +inline const CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1] +{ + static const CustomOptionsFormat values[] = {CustomOptionsFormat_FLEXBUFFERS}; + return values; +} + +inline const char *const *EnumNamesCustomOptionsFormat() +{ + static const char *const names[] = {"FLEXBUFFERS", nullptr}; + return names; +} + +inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesCustomOptionsFormat()[index]; +} + +enum DataFormat +{ + DataFormat_CHANNELS_LAST = 0, + DataFormat_CHANNELS_FIRST = 1, + DataFormat_MIN = DataFormat_CHANNELS_LAST, + DataFormat_MAX = DataFormat_CHANNELS_FIRST +}; + +inline const DataFormat (&EnumValuesDataFormat())[2] +{ + static const DataFormat values[] = {DataFormat_CHANNELS_LAST, DataFormat_CHANNELS_FIRST}; + return values; +} + +inline const char *const *EnumNamesDataFormat() +{ + static const char *const names[] = {"CHANNELS_LAST", "CHANNELS_FIRST", nullptr}; + return names; +} + +inline const char *EnumNameDataFormat(DataFormat e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesDataFormat()[index]; +} + +struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_CUSTOM = 4 + }; + const flatbuffers::Vector<uint8_t> *custom() const + { + return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CUSTOM) && + verifier.VerifyVector(custom()) && verifier.EndTable(); + } +}; + +struct CustomQuantizationBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_custom(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom) + { + fbb_.AddOffset(CustomQuantization::VT_CUSTOM, custom); + } + explicit CustomQuantizationBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + CustomQuantizationBuilder &operator=(const CustomQuantizationBuilder &); + flatbuffers::Offset<CustomQuantization> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<CustomQuantization>(end); + return o; + } +}; + +inline flatbuffers::Offset<CustomQuantization> +CreateCustomQuantization(flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom = 0) +{ + CustomQuantizationBuilder builder_(_fbb); + builder_.add_custom(custom); + return builder_.Finish(); +} + +inline flatbuffers::Offset<CustomQuantization> +CreateCustomQuantizationDirect(flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<uint8_t> *custom = nullptr) +{ + return circle::CreateCustomQuantization(_fbb, custom ? _fbb.CreateVector<uint8_t>(*custom) : 0); +} + +struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_MIN = 4, + VT_MAX = 6, + VT_SCALE = 8, + VT_ZERO_POINT = 10, + VT_DETAILS_TYPE = 12, + VT_DETAILS = 14 + }; + const flatbuffers::Vector<float> *min() const + { + return GetPointer<const flatbuffers::Vector<float> *>(VT_MIN); + } + const flatbuffers::Vector<float> *max() const + { + return GetPointer<const flatbuffers::Vector<float> *>(VT_MAX); + } + const flatbuffers::Vector<float> *scale() const + { + return GetPointer<const flatbuffers::Vector<float> *>(VT_SCALE); + } + const flatbuffers::Vector<int64_t> *zero_point() const + { + return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_ZERO_POINT); + } + QuantizationDetails details_type() const + { + return static_cast<QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0)); + } + const void *details() const { return GetPointer<const void *>(VT_DETAILS); } + template <typename T> const T *details_as() const; + const CustomQuantization *details_as_CustomQuantization() const + { + return details_type() == QuantizationDetails_CustomQuantization + ? static_cast<const CustomQuantization *>(details()) + : nullptr; + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_MIN) && + verifier.VerifyVector(min()) && VerifyOffset(verifier, VT_MAX) && + verifier.VerifyVector(max()) && VerifyOffset(verifier, VT_SCALE) && + verifier.VerifyVector(scale()) && VerifyOffset(verifier, VT_ZERO_POINT) && + verifier.VerifyVector(zero_point()) && VerifyField<uint8_t>(verifier, VT_DETAILS_TYPE) && + VerifyOffset(verifier, VT_DETAILS) && + VerifyQuantizationDetails(verifier, details(), details_type()) && verifier.EndTable(); + } +}; + +template <> +inline const CustomQuantization *QuantizationParameters::details_as<CustomQuantization>() const +{ + return details_as_CustomQuantization(); +} + +struct QuantizationParametersBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_min(flatbuffers::Offset<flatbuffers::Vector<float>> min) + { + fbb_.AddOffset(QuantizationParameters::VT_MIN, min); + } + void add_max(flatbuffers::Offset<flatbuffers::Vector<float>> max) + { + fbb_.AddOffset(QuantizationParameters::VT_MAX, max); + } + void add_scale(flatbuffers::Offset<flatbuffers::Vector<float>> scale) + { + fbb_.AddOffset(QuantizationParameters::VT_SCALE, scale); + } + void add_zero_point(flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point) + { + fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point); + } + void add_details_type(QuantizationDetails details_type) + { + fbb_.AddElement<uint8_t>(QuantizationParameters::VT_DETAILS_TYPE, + static_cast<uint8_t>(details_type), 0); + } + void add_details(flatbuffers::Offset<void> details) + { + fbb_.AddOffset(QuantizationParameters::VT_DETAILS, details); + } + explicit QuantizationParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &); + flatbuffers::Offset<QuantizationParameters> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<QuantizationParameters>(end); + return o; + } +}; + +inline flatbuffers::Offset<QuantizationParameters> +CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<float>> min = 0, + flatbuffers::Offset<flatbuffers::Vector<float>> max = 0, + flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0, + flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0, + QuantizationDetails details_type = QuantizationDetails_NONE, + flatbuffers::Offset<void> details = 0) +{ + QuantizationParametersBuilder builder_(_fbb); + builder_.add_details(details); + builder_.add_zero_point(zero_point); + builder_.add_scale(scale); + builder_.add_max(max); + builder_.add_min(min); + builder_.add_details_type(details_type); + return builder_.Finish(); +} + +inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect( + flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr, + const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr, + const std::vector<int64_t> *zero_point = nullptr, + QuantizationDetails details_type = QuantizationDetails_NONE, + flatbuffers::Offset<void> details = 0) +{ + return circle::CreateQuantizationParameters( + _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0, + scale ? _fbb.CreateVector<float>(*scale) : 0, + zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details); +} + +struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_SHAPE = 4, + VT_TYPE = 6, + VT_BUFFER = 8, + VT_NAME = 10, + VT_QUANTIZATION = 12, + VT_IS_VARIABLE = 14 + }; + const flatbuffers::Vector<int32_t> *shape() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE); + } + TensorType type() const { return static_cast<TensorType>(GetField<int8_t>(VT_TYPE, 0)); } + uint32_t buffer() const { return GetField<uint32_t>(VT_BUFFER, 0); } + const flatbuffers::String *name() const + { + return GetPointer<const flatbuffers::String *>(VT_NAME); + } + const QuantizationParameters *quantization() const + { + return GetPointer<const QuantizationParameters *>(VT_QUANTIZATION); + } + bool is_variable() const { return GetField<uint8_t>(VT_IS_VARIABLE, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SHAPE) && + verifier.VerifyVector(shape()) && VerifyField<int8_t>(verifier, VT_TYPE) && + VerifyField<uint32_t>(verifier, VT_BUFFER) && VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && VerifyOffset(verifier, VT_QUANTIZATION) && + verifier.VerifyTable(quantization()) && VerifyField<uint8_t>(verifier, VT_IS_VARIABLE) && + verifier.EndTable(); + } +}; + +struct TensorBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape) + { + fbb_.AddOffset(Tensor::VT_SHAPE, shape); + } + void add_type(TensorType type) + { + fbb_.AddElement<int8_t>(Tensor::VT_TYPE, static_cast<int8_t>(type), 0); + } + void add_buffer(uint32_t buffer) { fbb_.AddElement<uint32_t>(Tensor::VT_BUFFER, buffer, 0); } + void add_name(flatbuffers::Offset<flatbuffers::String> name) + { + fbb_.AddOffset(Tensor::VT_NAME, name); + } + void add_quantization(flatbuffers::Offset<QuantizationParameters> quantization) + { + fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization); + } + void add_is_variable(bool is_variable) + { + fbb_.AddElement<uint8_t>(Tensor::VT_IS_VARIABLE, static_cast<uint8_t>(is_variable), 0); + } + explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + TensorBuilder &operator=(const TensorBuilder &); + flatbuffers::Offset<Tensor> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<Tensor>(end); + return o; + } +}; + +inline flatbuffers::Offset<Tensor> +CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0, + TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, + flatbuffers::Offset<flatbuffers::String> name = 0, + flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false) +{ + TensorBuilder builder_(_fbb); + builder_.add_quantization(quantization); + builder_.add_name(name); + builder_.add_buffer(buffer); + builder_.add_shape(shape); + builder_.add_is_variable(is_variable); + builder_.add_type(type); + return builder_.Finish(); +} + +inline flatbuffers::Offset<Tensor> CreateTensorDirect( + flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr, + TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr, + flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false) +{ + return circle::CreateTensor(_fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer, + name ? _fbb.CreateString(name) : 0, quantization, is_variable); +} + +struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8, + VT_FUSED_ACTIVATION_FUNCTION = 10, + VT_DILATION_W_FACTOR = 12, + VT_DILATION_H_FACTOR = 14 + }; + Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); } + int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); } + int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); } + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); } + int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) && + VerifyField<int32_t>(verifier, VT_STRIDE_W) && + VerifyField<int32_t>(verifier, VT_STRIDE_H) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) && + VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable(); + } +}; + +struct Conv2DOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(Padding padding) + { + fbb_.AddElement<int8_t>(Conv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0); + } + void add_stride_w(int32_t stride_w) + { + fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) + { + fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + void add_dilation_w_factor(int32_t dilation_w_factor) + { + fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1); + } + void add_dilation_h_factor(int32_t dilation_h_factor) + { + fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1); + } + explicit Conv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &); + flatbuffers::Offset<Conv2DOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<Conv2DOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<Conv2DOptions> +CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, + int32_t stride_w = 0, int32_t stride_h = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) +{ + Conv2DOptionsBuilder builder_(_fbb); + builder_.add_dilation_h_factor(dilation_h_factor); + builder_.add_dilation_w_factor(dilation_w_factor); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8, + VT_FILTER_WIDTH = 10, + VT_FILTER_HEIGHT = 12, + VT_FUSED_ACTIVATION_FUNCTION = 14 + }; + Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); } + int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); } + int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); } + int32_t filter_width() const { return GetField<int32_t>(VT_FILTER_WIDTH, 0); } + int32_t filter_height() const { return GetField<int32_t>(VT_FILTER_HEIGHT, 0); } + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) && + VerifyField<int32_t>(verifier, VT_STRIDE_W) && + VerifyField<int32_t>(verifier, VT_STRIDE_H) && + VerifyField<int32_t>(verifier, VT_FILTER_WIDTH) && + VerifyField<int32_t>(verifier, VT_FILTER_HEIGHT) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct Pool2DOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(Padding padding) + { + fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0); + } + void add_stride_w(int32_t stride_w) + { + fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) + { + fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_filter_width(int32_t filter_width) + { + fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_WIDTH, filter_width, 0); + } + void add_filter_height(int32_t filter_height) + { + fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0); + } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit Pool2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &); + flatbuffers::Offset<Pool2DOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<Pool2DOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<Pool2DOptions> +CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, + int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, + int32_t filter_height = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + Pool2DOptionsBuilder builder_(_fbb); + builder_.add_filter_height(filter_height); + builder_.add_filter_width(filter_width); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8, + VT_DEPTH_MULTIPLIER = 10, + VT_FUSED_ACTIVATION_FUNCTION = 12, + VT_DILATION_W_FACTOR = 14, + VT_DILATION_H_FACTOR = 16 + }; + Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); } + int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); } + int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); } + int32_t depth_multiplier() const { return GetField<int32_t>(VT_DEPTH_MULTIPLIER, 0); } + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); } + int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) && + VerifyField<int32_t>(verifier, VT_STRIDE_W) && + VerifyField<int32_t>(verifier, VT_STRIDE_H) && + VerifyField<int32_t>(verifier, VT_DEPTH_MULTIPLIER) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) && + VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable(); + } +}; + +struct DepthwiseConv2DOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(Padding padding) + { + fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0); + } + void add_stride_w(int32_t stride_w) + { + fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) + { + fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_depth_multiplier(int32_t depth_multiplier) + { + fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0); + } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + void add_dilation_w_factor(int32_t dilation_w_factor) + { + fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1); + } + void add_dilation_h_factor(int32_t dilation_h_factor) + { + fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1); + } + explicit DepthwiseConv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &); + flatbuffers::Offset<DepthwiseConv2DOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<DepthwiseConv2DOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions( + flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0, + int32_t stride_h = 0, int32_t depth_multiplier = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) +{ + DepthwiseConv2DOptionsBuilder builder_(_fbb); + builder_.add_dilation_h_factor(dilation_h_factor); + builder_.add_dilation_w_factor(dilation_w_factor); + builder_.add_depth_multiplier(depth_multiplier); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_NUM_CHANNELS = 4, + VT_NUM_COLUMNS_PER_CHANNEL = 6, + VT_EMBEDDING_DIM_PER_CHANNEL = 8 + }; + int32_t num_channels() const { return GetField<int32_t>(VT_NUM_CHANNELS, 0); } + const flatbuffers::Vector<int32_t> *num_columns_per_channel() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NUM_COLUMNS_PER_CHANNEL); + } + const flatbuffers::Vector<int32_t> *embedding_dim_per_channel() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_EMBEDDING_DIM_PER_CHANNEL); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_CHANNELS) && + VerifyOffset(verifier, VT_NUM_COLUMNS_PER_CHANNEL) && + verifier.VerifyVector(num_columns_per_channel()) && + VerifyOffset(verifier, VT_EMBEDDING_DIM_PER_CHANNEL) && + verifier.VerifyVector(embedding_dim_per_channel()) && verifier.EndTable(); + } +}; + +struct ConcatEmbeddingsOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_num_channels(int32_t num_channels) + { + fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0); + } + void add_num_columns_per_channel( + flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel) + { + fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel); + } + void add_embedding_dim_per_channel( + flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel) + { + fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL, + embedding_dim_per_channel); + } + explicit ConcatEmbeddingsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &); + flatbuffers::Offset<ConcatEmbeddingsOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ConcatEmbeddingsOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions( + flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0) +{ + ConcatEmbeddingsOptionsBuilder builder_(_fbb); + builder_.add_embedding_dim_per_channel(embedding_dim_per_channel); + builder_.add_num_columns_per_channel(num_columns_per_channel); + builder_.add_num_channels(num_channels); + return builder_.Finish(); +} + +inline flatbuffers::Offset<ConcatEmbeddingsOptions> +CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0, + const std::vector<int32_t> *num_columns_per_channel = nullptr, + const std::vector<int32_t> *embedding_dim_per_channel = nullptr) +{ + return circle::CreateConcatEmbeddingsOptions( + _fbb, num_channels, + num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0, + embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0); +} + +struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_TYPE = 4 + }; + LSHProjectionType type() const + { + return static_cast<LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_TYPE) && + verifier.EndTable(); + } +}; + +struct LSHProjectionOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_type(LSHProjectionType type) + { + fbb_.AddElement<int8_t>(LSHProjectionOptions::VT_TYPE, static_cast<int8_t>(type), 0); + } + explicit LSHProjectionOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &); + flatbuffers::Offset<LSHProjectionOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LSHProjectionOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LSHProjectionOptions> +CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, + LSHProjectionType type = LSHProjectionType_UNKNOWN) +{ + LSHProjectionOptionsBuilder builder_(_fbb); + builder_.add_type(type); + return builder_.Finish(); +} + +struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_RANK = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6 + }; + int32_t rank() const { return GetField<int32_t>(VT_RANK, 0); } + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_RANK) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct SVDFOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_rank(int32_t rank) { fbb_.AddElement<int32_t>(SVDFOptions::VT_RANK, rank, 0); } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit SVDFOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &); + flatbuffers::Offset<SVDFOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SVDFOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SVDFOptions> +CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + SVDFOptionsBuilder builder_(_fbb); + builder_.add_rank(rank); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct RNNOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit RNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + RNNOptionsBuilder &operator=(const RNNOptionsBuilder &); + flatbuffers::Offset<RNNOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<RNNOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<RNNOptions> +CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + RNNOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_TIME_MAJOR = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6 + }; + bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; } + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct SequenceRNNOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_time_major(bool time_major) + { + fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major), + 0); + } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit SequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SequenceRNNOptionsBuilder &operator=(const SequenceRNNOptionsBuilder &); + flatbuffers::Offset<SequenceRNNOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SequenceRNNOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions( + flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + SequenceRNNOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_time_major(time_major); + return builder_.Finish(); +} + +struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_TIME_MAJOR = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6, + VT_MERGE_OUTPUTS = 8 + }; + bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; } + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) && verifier.EndTable(); + } +}; + +struct BidirectionalSequenceRNNOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_time_major(bool time_major) + { + fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR, + static_cast<uint8_t>(time_major), 0); + } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + void add_merge_outputs(bool merge_outputs) + { + fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_MERGE_OUTPUTS, + static_cast<uint8_t>(merge_outputs), 0); + } + explicit BidirectionalSequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + BidirectionalSequenceRNNOptionsBuilder &operator=(const BidirectionalSequenceRNNOptionsBuilder &); + flatbuffers::Offset<BidirectionalSequenceRNNOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<BidirectionalSequenceRNNOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions( + flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + bool merge_outputs = false) +{ + BidirectionalSequenceRNNOptionsBuilder builder_(_fbb); + builder_.add_merge_outputs(merge_outputs); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_time_major(time_major); + return builder_.Finish(); +} + +struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_WEIGHTS_FORMAT = 6 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + FullyConnectedOptionsWeightsFormat weights_format() const + { + return static_cast<FullyConnectedOptionsWeightsFormat>(GetField<int8_t>(VT_WEIGHTS_FORMAT, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<int8_t>(verifier, VT_WEIGHTS_FORMAT) && verifier.EndTable(); + } +}; + +struct FullyConnectedOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + void add_weights_format(FullyConnectedOptionsWeightsFormat weights_format) + { + fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_WEIGHTS_FORMAT, + static_cast<int8_t>(weights_format), 0); + } + explicit FullyConnectedOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &); + flatbuffers::Offset<FullyConnectedOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<FullyConnectedOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions( + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT) +{ + FullyConnectedOptionsBuilder builder_(_fbb); + builder_.add_weights_format(weights_format); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_BETA = 4 + }; + float beta() const { return GetField<float>(VT_BETA, 0.0f); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_BETA) && + verifier.EndTable(); + } +}; + +struct SoftmaxOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_beta(float beta) { fbb_.AddElement<float>(SoftmaxOptions::VT_BETA, beta, 0.0f); } + explicit SoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &); + flatbuffers::Offset<SoftmaxOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SoftmaxOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SoftmaxOptions> +CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, float beta = 0.0f) +{ + SoftmaxOptionsBuilder builder_(_fbb); + builder_.add_beta(beta); + return builder_.Finish(); +} + +struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_AXIS = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6 + }; + int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); } + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct ConcatenationOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(ConcatenationOptions::VT_AXIS, axis, 0); } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit ConcatenationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &); + flatbuffers::Offset<ConcatenationOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ConcatenationOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions( + flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + ConcatenationOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct AddOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + AddOptionsBuilder &operator=(const AddOptionsBuilder &); + flatbuffers::Offset<AddOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<AddOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<AddOptions> +CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + AddOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct MulOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(MulOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit MulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + MulOptionsBuilder &operator=(const MulOptionsBuilder &); + flatbuffers::Offset<MulOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<MulOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<MulOptions> +CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + MulOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct L2NormOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit L2NormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &); + flatbuffers::Offset<L2NormOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<L2NormOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<L2NormOptions> +CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + L2NormOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_RADIUS = 4, + VT_BIAS = 6, + VT_ALPHA = 8, + VT_BETA = 10 + }; + int32_t radius() const { return GetField<int32_t>(VT_RADIUS, 0); } + float bias() const { return GetField<float>(VT_BIAS, 0.0f); } + float alpha() const { return GetField<float>(VT_ALPHA, 0.0f); } + float beta() const { return GetField<float>(VT_BETA, 0.0f); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_RADIUS) && + VerifyField<float>(verifier, VT_BIAS) && VerifyField<float>(verifier, VT_ALPHA) && + VerifyField<float>(verifier, VT_BETA) && verifier.EndTable(); + } +}; + +struct LocalResponseNormalizationOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_radius(int32_t radius) + { + fbb_.AddElement<int32_t>(LocalResponseNormalizationOptions::VT_RADIUS, radius, 0); + } + void add_bias(float bias) + { + fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BIAS, bias, 0.0f); + } + void add_alpha(float alpha) + { + fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_ALPHA, alpha, 0.0f); + } + void add_beta(float beta) + { + fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f); + } + explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LocalResponseNormalizationOptionsBuilder & + operator=(const LocalResponseNormalizationOptionsBuilder &); + flatbuffers::Offset<LocalResponseNormalizationOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LocalResponseNormalizationOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LocalResponseNormalizationOptions> +CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t radius = 0, + float bias = 0.0f, float alpha = 0.0f, float beta = 0.0f) +{ + LocalResponseNormalizationOptionsBuilder builder_(_fbb); + builder_.add_beta(beta); + builder_.add_alpha(alpha); + builder_.add_bias(bias); + builder_.add_radius(radius); + return builder_.Finish(); +} + +struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_CELL_CLIP = 6, + VT_PROJ_CLIP = 8, + VT_KERNEL_TYPE = 10 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); } + float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); } + LSTMKernelType kernel_type() const + { + return static_cast<LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<float>(verifier, VT_CELL_CLIP) && + VerifyField<float>(verifier, VT_PROJ_CLIP) && + VerifyField<int8_t>(verifier, VT_KERNEL_TYPE) && verifier.EndTable(); + } +}; + +struct LSTMOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + void add_cell_clip(float cell_clip) + { + fbb_.AddElement<float>(LSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f); + } + void add_proj_clip(float proj_clip) + { + fbb_.AddElement<float>(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); + } + void add_kernel_type(LSTMKernelType kernel_type) + { + fbb_.AddElement<int8_t>(LSTMOptions::VT_KERNEL_TYPE, static_cast<int8_t>(kernel_type), 0); + } + explicit LSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &); + flatbuffers::Offset<LSTMOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LSTMOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LSTMOptions> +CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + float cell_clip = 0.0f, float proj_clip = 0.0f, + LSTMKernelType kernel_type = LSTMKernelType_FULL) +{ + LSTMOptionsBuilder builder_(_fbb); + builder_.add_proj_clip(proj_clip); + builder_.add_cell_clip(cell_clip); + builder_.add_kernel_type(kernel_type); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_CELL_CLIP = 6, + VT_PROJ_CLIP = 8, + VT_TIME_MAJOR = 10 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); } + float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); } + bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<float>(verifier, VT_CELL_CLIP) && + VerifyField<float>(verifier, VT_PROJ_CLIP) && + VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) && verifier.EndTable(); + } +}; + +struct UnidirectionalSequenceLSTMOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + void add_cell_clip(float cell_clip) + { + fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f); + } + void add_proj_clip(float proj_clip) + { + fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); + } + void add_time_major(bool time_major) + { + fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_TIME_MAJOR, + static_cast<uint8_t>(time_major), 0); + } + explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + UnidirectionalSequenceLSTMOptionsBuilder & + operator=(const UnidirectionalSequenceLSTMOptionsBuilder &); + flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> +CreateUnidirectionalSequenceLSTMOptions( + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false) +{ + UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); + builder_.add_proj_clip(proj_clip); + builder_.add_cell_clip(cell_clip); + builder_.add_time_major(time_major); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_CELL_CLIP = 6, + VT_PROJ_CLIP = 8, + VT_MERGE_OUTPUTS = 10 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); } + float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); } + bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<float>(verifier, VT_CELL_CLIP) && + VerifyField<float>(verifier, VT_PROJ_CLIP) && + VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) && verifier.EndTable(); + } +}; + +struct BidirectionalSequenceLSTMOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + void add_cell_clip(float cell_clip) + { + fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f); + } + void add_proj_clip(float proj_clip) + { + fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); + } + void add_merge_outputs(bool merge_outputs) + { + fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_MERGE_OUTPUTS, + static_cast<uint8_t>(merge_outputs), 0); + } + explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + BidirectionalSequenceLSTMOptionsBuilder & + operator=(const BidirectionalSequenceLSTMOptionsBuilder &); + flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<BidirectionalSequenceLSTMOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions( + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false) +{ + BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); + builder_.add_proj_clip(proj_clip); + builder_.add_cell_clip(cell_clip); + builder_.add_merge_outputs(merge_outputs); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_ALIGN_CORNERS = 8 + }; + bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) && + verifier.EndTable(); + } +}; + +struct ResizeBilinearOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_align_corners(bool align_corners) + { + fbb_.AddElement<uint8_t>(ResizeBilinearOptions::VT_ALIGN_CORNERS, + static_cast<uint8_t>(align_corners), 0); + } + explicit ResizeBilinearOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &); + flatbuffers::Offset<ResizeBilinearOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ResizeBilinearOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ResizeBilinearOptions> +CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false) +{ + ResizeBilinearOptionsBuilder builder_(_fbb); + builder_.add_align_corners(align_corners); + return builder_.Finish(); +} + +struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_ALIGN_CORNERS = 4 + }; + bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) && + verifier.EndTable(); + } +}; + +struct ResizeNearestNeighborOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_align_corners(bool align_corners) + { + fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS, + static_cast<uint8_t>(align_corners), 0); + } + explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ResizeNearestNeighborOptionsBuilder &operator=(const ResizeNearestNeighborOptionsBuilder &); + flatbuffers::Offset<ResizeNearestNeighborOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ResizeNearestNeighborOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ResizeNearestNeighborOptions> +CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false) +{ + ResizeNearestNeighborOptionsBuilder builder_(_fbb); + builder_.add_align_corners(align_corners); + return builder_.Finish(); +} + +struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_SUBGRAPH = 4 + }; + uint32_t subgraph() const { return GetField<uint32_t>(VT_SUBGRAPH, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_SUBGRAPH) && + verifier.EndTable(); + } +}; + +struct CallOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_subgraph(uint32_t subgraph) + { + fbb_.AddElement<uint32_t>(CallOptions::VT_SUBGRAPH, subgraph, 0); + } + explicit CallOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + CallOptionsBuilder &operator=(const CallOptionsBuilder &); + flatbuffers::Offset<CallOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<CallOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb, + uint32_t subgraph = 0) +{ + CallOptionsBuilder builder_(_fbb); + builder_.add_subgraph(subgraph); + return builder_.Finish(); +} + +struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct PadOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + PadOptionsBuilder &operator=(const PadOptionsBuilder &); + flatbuffers::Offset<PadOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<PadOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<PadOptions> CreatePadOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + PadOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct PadV2OptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit PadV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + PadV2OptionsBuilder &operator=(const PadV2OptionsBuilder &); + flatbuffers::Offset<PadV2Options> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<PadV2Options>(end); + return o; + } +}; + +inline flatbuffers::Offset<PadV2Options> CreatePadV2Options(flatbuffers::FlatBufferBuilder &_fbb) +{ + PadV2OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_NEW_SHAPE = 4 + }; + const flatbuffers::Vector<int32_t> *new_shape() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NEW_SHAPE); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NEW_SHAPE) && + verifier.VerifyVector(new_shape()) && verifier.EndTable(); + } +}; + +struct ReshapeOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_new_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape) + { + fbb_.AddOffset(ReshapeOptions::VT_NEW_SHAPE, new_shape); + } + explicit ReshapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &); + flatbuffers::Offset<ReshapeOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ReshapeOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ReshapeOptions> +CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape = 0) +{ + ReshapeOptionsBuilder builder_(_fbb); + builder_.add_new_shape(new_shape); + return builder_.Finish(); +} + +inline flatbuffers::Offset<ReshapeOptions> +CreateReshapeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<int32_t> *new_shape = nullptr) +{ + return circle::CreateReshapeOptions(_fbb, new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0); +} + +struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct SpaceToBatchNDOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SpaceToBatchNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SpaceToBatchNDOptionsBuilder &operator=(const SpaceToBatchNDOptionsBuilder &); + flatbuffers::Offset<SpaceToBatchNDOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SpaceToBatchNDOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SpaceToBatchNDOptions> +CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + SpaceToBatchNDOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct BatchToSpaceNDOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &); + flatbuffers::Offset<BatchToSpaceNDOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<BatchToSpaceNDOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<BatchToSpaceNDOptions> +CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + BatchToSpaceNDOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_NGRAM_SIZE = 4, + VT_MAX_SKIP_SIZE = 6, + VT_INCLUDE_ALL_NGRAMS = 8 + }; + int32_t ngram_size() const { return GetField<int32_t>(VT_NGRAM_SIZE, 0); } + int32_t max_skip_size() const { return GetField<int32_t>(VT_MAX_SKIP_SIZE, 0); } + bool include_all_ngrams() const { return GetField<uint8_t>(VT_INCLUDE_ALL_NGRAMS, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NGRAM_SIZE) && + VerifyField<int32_t>(verifier, VT_MAX_SKIP_SIZE) && + VerifyField<uint8_t>(verifier, VT_INCLUDE_ALL_NGRAMS) && verifier.EndTable(); + } +}; + +struct SkipGramOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_ngram_size(int32_t ngram_size) + { + fbb_.AddElement<int32_t>(SkipGramOptions::VT_NGRAM_SIZE, ngram_size, 0); + } + void add_max_skip_size(int32_t max_skip_size) + { + fbb_.AddElement<int32_t>(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size, 0); + } + void add_include_all_ngrams(bool include_all_ngrams) + { + fbb_.AddElement<uint8_t>(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS, + static_cast<uint8_t>(include_all_ngrams), 0); + } + explicit SkipGramOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &); + flatbuffers::Offset<SkipGramOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SkipGramOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SkipGramOptions> +CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t ngram_size = 0, + int32_t max_skip_size = 0, bool include_all_ngrams = false) +{ + SkipGramOptionsBuilder builder_(_fbb); + builder_.add_max_skip_size(max_skip_size); + builder_.add_ngram_size(ngram_size); + builder_.add_include_all_ngrams(include_all_ngrams); + return builder_.Finish(); +} + +struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_BLOCK_SIZE = 4 + }; + int32_t block_size() const { return GetField<int32_t>(VT_BLOCK_SIZE, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_BLOCK_SIZE) && + verifier.EndTable(); + } +}; + +struct SpaceToDepthOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_block_size(int32_t block_size) + { + fbb_.AddElement<int32_t>(SpaceToDepthOptions::VT_BLOCK_SIZE, block_size, 0); + } + explicit SpaceToDepthOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &); + flatbuffers::Offset<SpaceToDepthOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SpaceToDepthOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SpaceToDepthOptions> +CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_size = 0) +{ + SpaceToDepthOptionsBuilder builder_(_fbb); + builder_.add_block_size(block_size); + return builder_.Finish(); +} + +struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct SubOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SubOptionsBuilder &operator=(const SubOptionsBuilder &); + flatbuffers::Offset<SubOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SubOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SubOptions> +CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + SubOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct DivOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(DivOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit DivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + DivOptionsBuilder &operator=(const DivOptionsBuilder &); + flatbuffers::Offset<DivOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<DivOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<DivOptions> +CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + DivOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct TopKV2OptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit TopKV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + TopKV2OptionsBuilder &operator=(const TopKV2OptionsBuilder &); + flatbuffers::Offset<TopKV2Options> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<TopKV2Options>(end); + return o; + } +}; + +inline flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(flatbuffers::FlatBufferBuilder &_fbb) +{ + TopKV2OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_COMBINER = 4 + }; + CombinerType combiner() const + { + return static_cast<CombinerType>(GetField<int8_t>(VT_COMBINER, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_COMBINER) && + verifier.EndTable(); + } +}; + +struct EmbeddingLookupSparseOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_combiner(CombinerType combiner) + { + fbb_.AddElement<int8_t>(EmbeddingLookupSparseOptions::VT_COMBINER, + static_cast<int8_t>(combiner), 0); + } + explicit EmbeddingLookupSparseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &); + flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<EmbeddingLookupSparseOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<EmbeddingLookupSparseOptions> +CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb, + CombinerType combiner = CombinerType_SUM) +{ + EmbeddingLookupSparseOptionsBuilder builder_(_fbb); + builder_.add_combiner(combiner); + return builder_.Finish(); +} + +struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_AXIS = 4 + }; + int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) && + verifier.EndTable(); + } +}; + +struct GatherOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0); } + explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + GatherOptionsBuilder &operator=(const GatherOptionsBuilder &); + flatbuffers::Offset<GatherOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<GatherOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, + int32_t axis = 0) +{ + GatherOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + return builder_.Finish(); +} + +struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct TransposeOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit TransposeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + TransposeOptionsBuilder &operator=(const TransposeOptionsBuilder &); + flatbuffers::Offset<TransposeOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<TransposeOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<TransposeOptions> +CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + TransposeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct ExpOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ExpOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ExpOptionsBuilder &operator=(const ExpOptionsBuilder &); + flatbuffers::Offset<ExpOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ExpOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ExpOptions> CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + ExpOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_KEEP_DIMS = 4 + }; + bool keep_dims() const { return GetField<uint8_t>(VT_KEEP_DIMS, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_KEEP_DIMS) && + verifier.EndTable(); + } +}; + +struct ReducerOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_keep_dims(bool keep_dims) + { + fbb_.AddElement<uint8_t>(ReducerOptions::VT_KEEP_DIMS, static_cast<uint8_t>(keep_dims), 0); + } + explicit ReducerOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ReducerOptionsBuilder &operator=(const ReducerOptionsBuilder &); + flatbuffers::Offset<ReducerOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ReducerOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ReducerOptions> +CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, bool keep_dims = false) +{ + ReducerOptionsBuilder builder_(_fbb); + builder_.add_keep_dims(keep_dims); + return builder_.Finish(); +} + +struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_SQUEEZE_DIMS = 4 + }; + const flatbuffers::Vector<int32_t> *squeeze_dims() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SQUEEZE_DIMS); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SQUEEZE_DIMS) && + verifier.VerifyVector(squeeze_dims()) && verifier.EndTable(); + } +}; + +struct SqueezeOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_squeeze_dims(flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims) + { + fbb_.AddOffset(SqueezeOptions::VT_SQUEEZE_DIMS, squeeze_dims); + } + explicit SqueezeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SqueezeOptionsBuilder &operator=(const SqueezeOptionsBuilder &); + flatbuffers::Offset<SqueezeOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SqueezeOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SqueezeOptions> +CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims = 0) +{ + SqueezeOptionsBuilder builder_(_fbb); + builder_.add_squeeze_dims(squeeze_dims); + return builder_.Finish(); +} + +inline flatbuffers::Offset<SqueezeOptions> +CreateSqueezeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<int32_t> *squeeze_dims = nullptr) +{ + return circle::CreateSqueezeOptions(_fbb, + squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0); +} + +struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_NUM_SPLITS = 4 + }; + int32_t num_splits() const { return GetField<int32_t>(VT_NUM_SPLITS, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_SPLITS) && + verifier.EndTable(); + } +}; + +struct SplitOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_num_splits(int32_t num_splits) + { + fbb_.AddElement<int32_t>(SplitOptions::VT_NUM_SPLITS, num_splits, 0); + } + explicit SplitOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SplitOptionsBuilder &operator=(const SplitOptionsBuilder &); + flatbuffers::Offset<SplitOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SplitOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SplitOptions> CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_splits = 0) +{ + SplitOptionsBuilder builder_(_fbb); + builder_.add_num_splits(num_splits); + return builder_.Finish(); +} + +struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_NUM_SPLITS = 4 + }; + int32_t num_splits() const { return GetField<int32_t>(VT_NUM_SPLITS, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_SPLITS) && + verifier.EndTable(); + } +}; + +struct SplitVOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_num_splits(int32_t num_splits) + { + fbb_.AddElement<int32_t>(SplitVOptions::VT_NUM_SPLITS, num_splits, 0); + } + explicit SplitVOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SplitVOptionsBuilder &operator=(const SplitVOptionsBuilder &); + flatbuffers::Offset<SplitVOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SplitVOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_splits = 0) +{ + SplitVOptionsBuilder builder_(_fbb); + builder_.add_num_splits(num_splits); + return builder_.Finish(); +} + +struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_BEGIN_MASK = 4, + VT_END_MASK = 6, + VT_ELLIPSIS_MASK = 8, + VT_NEW_AXIS_MASK = 10, + VT_SHRINK_AXIS_MASK = 12 + }; + int32_t begin_mask() const { return GetField<int32_t>(VT_BEGIN_MASK, 0); } + int32_t end_mask() const { return GetField<int32_t>(VT_END_MASK, 0); } + int32_t ellipsis_mask() const { return GetField<int32_t>(VT_ELLIPSIS_MASK, 0); } + int32_t new_axis_mask() const { return GetField<int32_t>(VT_NEW_AXIS_MASK, 0); } + int32_t shrink_axis_mask() const { return GetField<int32_t>(VT_SHRINK_AXIS_MASK, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_BEGIN_MASK) && + VerifyField<int32_t>(verifier, VT_END_MASK) && + VerifyField<int32_t>(verifier, VT_ELLIPSIS_MASK) && + VerifyField<int32_t>(verifier, VT_NEW_AXIS_MASK) && + VerifyField<int32_t>(verifier, VT_SHRINK_AXIS_MASK) && verifier.EndTable(); + } +}; + +struct StridedSliceOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_begin_mask(int32_t begin_mask) + { + fbb_.AddElement<int32_t>(StridedSliceOptions::VT_BEGIN_MASK, begin_mask, 0); + } + void add_end_mask(int32_t end_mask) + { + fbb_.AddElement<int32_t>(StridedSliceOptions::VT_END_MASK, end_mask, 0); + } + void add_ellipsis_mask(int32_t ellipsis_mask) + { + fbb_.AddElement<int32_t>(StridedSliceOptions::VT_ELLIPSIS_MASK, ellipsis_mask, 0); + } + void add_new_axis_mask(int32_t new_axis_mask) + { + fbb_.AddElement<int32_t>(StridedSliceOptions::VT_NEW_AXIS_MASK, new_axis_mask, 0); + } + void add_shrink_axis_mask(int32_t shrink_axis_mask) + { + fbb_.AddElement<int32_t>(StridedSliceOptions::VT_SHRINK_AXIS_MASK, shrink_axis_mask, 0); + } + explicit StridedSliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + StridedSliceOptionsBuilder &operator=(const StridedSliceOptionsBuilder &); + flatbuffers::Offset<StridedSliceOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<StridedSliceOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<StridedSliceOptions> +CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t begin_mask = 0, + int32_t end_mask = 0, int32_t ellipsis_mask = 0, + int32_t new_axis_mask = 0, int32_t shrink_axis_mask = 0) +{ + StridedSliceOptionsBuilder builder_(_fbb); + builder_.add_shrink_axis_mask(shrink_axis_mask); + builder_.add_new_axis_mask(new_axis_mask); + builder_.add_ellipsis_mask(ellipsis_mask); + builder_.add_end_mask(end_mask); + builder_.add_begin_mask(begin_mask); + return builder_.Finish(); +} + +struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LogSoftmaxOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LogSoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LogSoftmaxOptionsBuilder &operator=(const LogSoftmaxOptionsBuilder &); + flatbuffers::Offset<LogSoftmaxOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LogSoftmaxOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LogSoftmaxOptions> +CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + LogSoftmaxOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_IN_DATA_TYPE = 4, + VT_OUT_DATA_TYPE = 6 + }; + TensorType in_data_type() const + { + return static_cast<TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0)); + } + TensorType out_data_type() const + { + return static_cast<TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_IN_DATA_TYPE) && + VerifyField<int8_t>(verifier, VT_OUT_DATA_TYPE) && verifier.EndTable(); + } +}; + +struct CastOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_in_data_type(TensorType in_data_type) + { + fbb_.AddElement<int8_t>(CastOptions::VT_IN_DATA_TYPE, static_cast<int8_t>(in_data_type), 0); + } + void add_out_data_type(TensorType out_data_type) + { + fbb_.AddElement<int8_t>(CastOptions::VT_OUT_DATA_TYPE, static_cast<int8_t>(out_data_type), 0); + } + explicit CastOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + CastOptionsBuilder &operator=(const CastOptionsBuilder &); + flatbuffers::Offset<CastOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<CastOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<CastOptions> +CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb, + TensorType in_data_type = TensorType_FLOAT32, + TensorType out_data_type = TensorType_FLOAT32) +{ + CastOptionsBuilder builder_(_fbb); + builder_.add_out_data_type(out_data_type); + builder_.add_in_data_type(in_data_type); + return builder_.Finish(); +} + +struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct DequantizeOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit DequantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + DequantizeOptionsBuilder &operator=(const DequantizeOptionsBuilder &); + flatbuffers::Offset<DequantizeOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<DequantizeOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<DequantizeOptions> +CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + DequantizeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct MaximumMinimumOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit MaximumMinimumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + MaximumMinimumOptionsBuilder &operator=(const MaximumMinimumOptionsBuilder &); + flatbuffers::Offset<MaximumMinimumOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<MaximumMinimumOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<MaximumMinimumOptions> +CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + MaximumMinimumOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct TileOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit TileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + TileOptionsBuilder &operator=(const TileOptionsBuilder &); + flatbuffers::Offset<TileOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<TileOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<TileOptions> CreateTileOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + TileOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_OUTPUT_TYPE = 4 + }; + TensorType output_type() const + { + return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE) && + verifier.EndTable(); + } +}; + +struct ArgMaxOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_output_type(TensorType output_type) + { + fbb_.AddElement<int8_t>(ArgMaxOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0); + } + explicit ArgMaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ArgMaxOptionsBuilder &operator=(const ArgMaxOptionsBuilder &); + flatbuffers::Offset<ArgMaxOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ArgMaxOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ArgMaxOptions> +CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, + TensorType output_type = TensorType_FLOAT32) +{ + ArgMaxOptionsBuilder builder_(_fbb); + builder_.add_output_type(output_type); + return builder_.Finish(); +} + +struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_OUTPUT_TYPE = 4 + }; + TensorType output_type() const + { + return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE) && + verifier.EndTable(); + } +}; + +struct ArgMinOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_output_type(TensorType output_type) + { + fbb_.AddElement<int8_t>(ArgMinOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0); + } + explicit ArgMinOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ArgMinOptionsBuilder &operator=(const ArgMinOptionsBuilder &); + flatbuffers::Offset<ArgMinOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ArgMinOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ArgMinOptions> +CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb, + TensorType output_type = TensorType_FLOAT32) +{ + ArgMinOptionsBuilder builder_(_fbb); + builder_.add_output_type(output_type); + return builder_.Finish(); +} + +struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct GreaterOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit GreaterOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + GreaterOptionsBuilder &operator=(const GreaterOptionsBuilder &); + flatbuffers::Offset<GreaterOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<GreaterOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<GreaterOptions> +CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + GreaterOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct GreaterEqualOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit GreaterEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + GreaterEqualOptionsBuilder &operator=(const GreaterEqualOptionsBuilder &); + flatbuffers::Offset<GreaterEqualOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<GreaterEqualOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<GreaterEqualOptions> +CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + GreaterEqualOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LessOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LessOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LessOptionsBuilder &operator=(const LessOptionsBuilder &); + flatbuffers::Offset<LessOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LessOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LessOptions> CreateLessOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + LessOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LessEqualOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LessEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LessEqualOptionsBuilder &operator=(const LessEqualOptionsBuilder &); + flatbuffers::Offset<LessEqualOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LessEqualOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LessEqualOptions> +CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + LessEqualOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct NegOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit NegOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + NegOptionsBuilder &operator=(const NegOptionsBuilder &); + flatbuffers::Offset<NegOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<NegOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<NegOptions> CreateNegOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + NegOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct SelectOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SelectOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SelectOptionsBuilder &operator=(const SelectOptionsBuilder &); + flatbuffers::Offset<SelectOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SelectOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SelectOptions> CreateSelectOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + SelectOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct SliceOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SliceOptionsBuilder &operator=(const SliceOptionsBuilder &); + flatbuffers::Offset<SliceOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SliceOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SliceOptions> CreateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + SliceOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8 + }; + Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); } + int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); } + int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) && + VerifyField<int32_t>(verifier, VT_STRIDE_W) && + VerifyField<int32_t>(verifier, VT_STRIDE_H) && verifier.EndTable(); + } +}; + +struct TransposeConvOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(Padding padding) + { + fbb_.AddElement<int8_t>(TransposeConvOptions::VT_PADDING, static_cast<int8_t>(padding), 0); + } + void add_stride_w(int32_t stride_w) + { + fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) + { + fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_H, stride_h, 0); + } + explicit TransposeConvOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + TransposeConvOptionsBuilder &operator=(const TransposeConvOptionsBuilder &); + flatbuffers::Offset<TransposeConvOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<TransposeConvOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<TransposeConvOptions> +CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, + int32_t stride_w = 0, int32_t stride_h = 0) +{ + TransposeConvOptionsBuilder builder_(_fbb); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_padding(padding); + return builder_.Finish(); +} + +struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct ExpandDimsOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ExpandDimsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ExpandDimsOptionsBuilder &operator=(const ExpandDimsOptionsBuilder &); + flatbuffers::Offset<ExpandDimsOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ExpandDimsOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ExpandDimsOptions> +CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + ExpandDimsOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_VALIDATE_INDICES = 4 + }; + bool validate_indices() const { return GetField<uint8_t>(VT_VALIDATE_INDICES, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_VALIDATE_INDICES) && + verifier.EndTable(); + } +}; + +struct SparseToDenseOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_validate_indices(bool validate_indices) + { + fbb_.AddElement<uint8_t>(SparseToDenseOptions::VT_VALIDATE_INDICES, + static_cast<uint8_t>(validate_indices), 0); + } + explicit SparseToDenseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SparseToDenseOptionsBuilder &operator=(const SparseToDenseOptionsBuilder &); + flatbuffers::Offset<SparseToDenseOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SparseToDenseOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SparseToDenseOptions> +CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, bool validate_indices = false) +{ + SparseToDenseOptionsBuilder builder_(_fbb); + builder_.add_validate_indices(validate_indices); + return builder_.Finish(); +} + +struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct EqualOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit EqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + EqualOptionsBuilder &operator=(const EqualOptionsBuilder &); + flatbuffers::Offset<EqualOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<EqualOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<EqualOptions> CreateEqualOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + EqualOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct NotEqualOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit NotEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + NotEqualOptionsBuilder &operator=(const NotEqualOptionsBuilder &); + flatbuffers::Offset<NotEqualOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<NotEqualOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<NotEqualOptions> +CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + NotEqualOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_OUT_TYPE = 4 + }; + TensorType out_type() const { return static_cast<TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0)); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUT_TYPE) && + verifier.EndTable(); + } +}; + +struct ShapeOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_out_type(TensorType out_type) + { + fbb_.AddElement<int8_t>(ShapeOptions::VT_OUT_TYPE, static_cast<int8_t>(out_type), 0); + } + explicit ShapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ShapeOptionsBuilder &operator=(const ShapeOptionsBuilder &); + flatbuffers::Offset<ShapeOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ShapeOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ShapeOptions> +CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, TensorType out_type = TensorType_FLOAT32) +{ + ShapeOptionsBuilder builder_(_fbb); + builder_.add_out_type(out_type); + return builder_.Finish(); +} + +struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct PowOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit PowOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + PowOptionsBuilder &operator=(const PowOptionsBuilder &); + flatbuffers::Offset<PowOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<PowOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<PowOptions> CreatePowOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + PowOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_MIN = 4, + VT_MAX = 6, + VT_NUM_BITS = 8, + VT_NARROW_RANGE = 10 + }; + float min() const { return GetField<float>(VT_MIN, 0.0f); } + float max() const { return GetField<float>(VT_MAX, 0.0f); } + int32_t num_bits() const { return GetField<int32_t>(VT_NUM_BITS, 0); } + bool narrow_range() const { return GetField<uint8_t>(VT_NARROW_RANGE, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_MIN) && + VerifyField<float>(verifier, VT_MAX) && VerifyField<int32_t>(verifier, VT_NUM_BITS) && + VerifyField<uint8_t>(verifier, VT_NARROW_RANGE) && verifier.EndTable(); + } +}; + +struct FakeQuantOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_min(float min) { fbb_.AddElement<float>(FakeQuantOptions::VT_MIN, min, 0.0f); } + void add_max(float max) { fbb_.AddElement<float>(FakeQuantOptions::VT_MAX, max, 0.0f); } + void add_num_bits(int32_t num_bits) + { + fbb_.AddElement<int32_t>(FakeQuantOptions::VT_NUM_BITS, num_bits, 0); + } + void add_narrow_range(bool narrow_range) + { + fbb_.AddElement<uint8_t>(FakeQuantOptions::VT_NARROW_RANGE, static_cast<uint8_t>(narrow_range), + 0); + } + explicit FakeQuantOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + FakeQuantOptionsBuilder &operator=(const FakeQuantOptionsBuilder &); + flatbuffers::Offset<FakeQuantOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<FakeQuantOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<FakeQuantOptions> +CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, float min = 0.0f, float max = 0.0f, + int32_t num_bits = 0, bool narrow_range = false) +{ + FakeQuantOptionsBuilder builder_(_fbb); + builder_.add_num_bits(num_bits); + builder_.add_max(max); + builder_.add_min(min); + builder_.add_narrow_range(narrow_range); + return builder_.Finish(); +} + +struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_VALUES_COUNT = 4, + VT_AXIS = 6 + }; + int32_t values_count() const { return GetField<int32_t>(VT_VALUES_COUNT, 0); } + int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_VALUES_COUNT) && + VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable(); + } +}; + +struct PackOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_values_count(int32_t values_count) + { + fbb_.AddElement<int32_t>(PackOptions::VT_VALUES_COUNT, values_count, 0); + } + void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(PackOptions::VT_AXIS, axis, 0); } + explicit PackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + PackOptionsBuilder &operator=(const PackOptionsBuilder &); + flatbuffers::Offset<PackOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<PackOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<PackOptions> +CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t values_count = 0, int32_t axis = 0) +{ + PackOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + builder_.add_values_count(values_count); + return builder_.Finish(); +} + +struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LogicalOrOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LogicalOrOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LogicalOrOptionsBuilder &operator=(const LogicalOrOptionsBuilder &); + flatbuffers::Offset<LogicalOrOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LogicalOrOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LogicalOrOptions> +CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + LogicalOrOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_AXIS = 4 + }; + int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) && + verifier.EndTable(); + } +}; + +struct OneHotOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(OneHotOptions::VT_AXIS, axis, 0); } + explicit OneHotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + OneHotOptionsBuilder &operator=(const OneHotOptionsBuilder &); + flatbuffers::Offset<OneHotOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<OneHotOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(flatbuffers::FlatBufferBuilder &_fbb, + int32_t axis = 0) +{ + OneHotOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + return builder_.Finish(); +} + +struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct AbsOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit AbsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + AbsOptionsBuilder &operator=(const AbsOptionsBuilder &); + flatbuffers::Offset<AbsOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<AbsOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<AbsOptions> CreateAbsOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + AbsOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LogicalAndOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LogicalAndOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LogicalAndOptionsBuilder &operator=(const LogicalAndOptionsBuilder &); + flatbuffers::Offset<LogicalAndOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LogicalAndOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LogicalAndOptions> +CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + LogicalAndOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LogicalNotOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LogicalNotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LogicalNotOptionsBuilder &operator=(const LogicalNotOptionsBuilder &); + flatbuffers::Offset<LogicalNotOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LogicalNotOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LogicalNotOptions> +CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + LogicalNotOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_NUM = 4, + VT_AXIS = 6 + }; + int32_t num() const { return GetField<int32_t>(VT_NUM, 0); } + int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM) && + VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable(); + } +}; + +struct UnpackOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_num(int32_t num) { fbb_.AddElement<int32_t>(UnpackOptions::VT_NUM, num, 0); } + void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(UnpackOptions::VT_AXIS, axis, 0); } + explicit UnpackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + UnpackOptionsBuilder &operator=(const UnpackOptionsBuilder &); + flatbuffers::Offset<UnpackOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<UnpackOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(flatbuffers::FlatBufferBuilder &_fbb, + int32_t num = 0, int32_t axis = 0) +{ + UnpackOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + builder_.add_num(num); + return builder_.Finish(); +} + +struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct FloorDivOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit FloorDivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + FloorDivOptionsBuilder &operator=(const FloorDivOptionsBuilder &); + flatbuffers::Offset<FloorDivOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<FloorDivOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<FloorDivOptions> +CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + FloorDivOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct SquareOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SquareOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SquareOptionsBuilder &operator=(const SquareOptionsBuilder &); + flatbuffers::Offset<SquareOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SquareOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + SquareOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct ZerosLikeOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ZerosLikeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ZerosLikeOptionsBuilder &operator=(const ZerosLikeOptionsBuilder &); + flatbuffers::Offset<ZerosLikeOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ZerosLikeOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ZerosLikeOptions> +CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + ZerosLikeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct FillOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit FillOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + FillOptionsBuilder &operator=(const FillOptionsBuilder &); + flatbuffers::Offset<FillOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<FillOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + FillOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct FloorModOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &); + flatbuffers::Offset<FloorModOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<FloorModOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<FloorModOptions> +CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + FloorModOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct RangeOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + RangeOptionsBuilder &operator=(const RangeOptionsBuilder &); + flatbuffers::Offset<RangeOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<RangeOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + RangeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_ALPHA = 4 + }; + float alpha() const { return GetField<float>(VT_ALPHA, 0.0f); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_ALPHA) && + verifier.EndTable(); + } +}; + +struct LeakyReluOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_alpha(float alpha) { fbb_.AddElement<float>(LeakyReluOptions::VT_ALPHA, alpha, 0.0f); } + explicit LeakyReluOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LeakyReluOptionsBuilder &operator=(const LeakyReluOptionsBuilder &); + flatbuffers::Offset<LeakyReluOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LeakyReluOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LeakyReluOptions> +CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, float alpha = 0.0f) +{ + LeakyReluOptionsBuilder builder_(_fbb); + builder_.add_alpha(alpha); + return builder_.Finish(); +} + +struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct SquaredDifferenceOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SquaredDifferenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SquaredDifferenceOptionsBuilder &operator=(const SquaredDifferenceOptionsBuilder &); + flatbuffers::Offset<SquaredDifferenceOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SquaredDifferenceOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SquaredDifferenceOptions> +CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + SquaredDifferenceOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_MODE = 4 + }; + MirrorPadMode mode() const { return static_cast<MirrorPadMode>(GetField<int8_t>(VT_MODE, 0)); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_MODE) && + verifier.EndTable(); + } +}; + +struct MirrorPadOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_mode(MirrorPadMode mode) + { + fbb_.AddElement<int8_t>(MirrorPadOptions::VT_MODE, static_cast<int8_t>(mode), 0); + } + explicit MirrorPadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + MirrorPadOptionsBuilder &operator=(const MirrorPadOptionsBuilder &); + flatbuffers::Offset<MirrorPadOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<MirrorPadOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<MirrorPadOptions> +CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb, + MirrorPadMode mode = MirrorPadMode_REFLECT) +{ + MirrorPadOptionsBuilder builder_(_fbb); + builder_.add_mode(mode); + return builder_.Finish(); +} + +struct InstanceNormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_EPSILON = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6 + }; + float epsilon() const { return GetField<float>(VT_EPSILON, 0.0f); } + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_EPSILON) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct InstanceNormOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_epsilon(float epsilon) + { + fbb_.AddElement<float>(InstanceNormOptions::VT_EPSILON, epsilon, 0.0f); + } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(InstanceNormOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit InstanceNormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + InstanceNormOptionsBuilder &operator=(const InstanceNormOptionsBuilder &); + flatbuffers::Offset<InstanceNormOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<InstanceNormOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<InstanceNormOptions> CreateInstanceNormOptions( + flatbuffers::FlatBufferBuilder &_fbb, float epsilon = 0.0f, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + InstanceNormOptionsBuilder builder_(_fbb); + builder_.add_epsilon(epsilon); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_BUILTIN_CODE = 4, + VT_CUSTOM_CODE = 6, + VT_VERSION = 8 + }; + BuiltinOperator builtin_code() const + { + return static_cast<BuiltinOperator>(GetField<uint8_t>(VT_BUILTIN_CODE, 0)); + } + const flatbuffers::String *custom_code() const + { + return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE); + } + int32_t version() const { return GetField<int32_t>(VT_VERSION, 1); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_BUILTIN_CODE) && + VerifyOffset(verifier, VT_CUSTOM_CODE) && verifier.VerifyString(custom_code()) && + VerifyField<int32_t>(verifier, VT_VERSION) && verifier.EndTable(); + } +}; + +struct OperatorCodeBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_builtin_code(BuiltinOperator builtin_code) + { + fbb_.AddElement<uint8_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<uint8_t>(builtin_code), 0); + } + void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code) + { + fbb_.AddOffset(OperatorCode::VT_CUSTOM_CODE, custom_code); + } + void add_version(int32_t version) + { + fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1); + } + explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + OperatorCodeBuilder &operator=(const OperatorCodeBuilder &); + flatbuffers::Offset<OperatorCode> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<OperatorCode>(end); + return o; + } +}; + +inline flatbuffers::Offset<OperatorCode> +CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, + BuiltinOperator builtin_code = BuiltinOperator_ADD, + flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1) +{ + OperatorCodeBuilder builder_(_fbb); + builder_.add_version(version); + builder_.add_custom_code(custom_code); + builder_.add_builtin_code(builtin_code); + return builder_.Finish(); +} + +inline flatbuffers::Offset<OperatorCode> +CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb, + BuiltinOperator builtin_code = BuiltinOperator_ADD, + const char *custom_code = nullptr, int32_t version = 1) +{ + return circle::CreateOperatorCode(_fbb, builtin_code, + custom_code ? _fbb.CreateString(custom_code) : 0, version); +} + +struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_OPCODE_INDEX = 4, + VT_INPUTS = 6, + VT_OUTPUTS = 8, + VT_BUILTIN_OPTIONS_TYPE = 10, + VT_BUILTIN_OPTIONS = 12, + VT_CUSTOM_OPTIONS = 14, + VT_CUSTOM_OPTIONS_FORMAT = 16, + VT_MUTATING_VARIABLE_INPUTS = 18 + }; + uint32_t opcode_index() const { return GetField<uint32_t>(VT_OPCODE_INDEX, 0); } + const flatbuffers::Vector<int32_t> *inputs() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS); + } + const flatbuffers::Vector<int32_t> *outputs() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS); + } + BuiltinOptions builtin_options_type() const + { + return static_cast<BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0)); + } + const void *builtin_options() const { return GetPointer<const void *>(VT_BUILTIN_OPTIONS); } + template <typename T> const T *builtin_options_as() const; + const Conv2DOptions *builtin_options_as_Conv2DOptions() const + { + return builtin_options_type() == BuiltinOptions_Conv2DOptions + ? static_cast<const Conv2DOptions *>(builtin_options()) + : nullptr; + } + const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const + { + return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions + ? static_cast<const DepthwiseConv2DOptions *>(builtin_options()) + : nullptr; + } + const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const + { + return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions + ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options()) + : nullptr; + } + const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const + { + return builtin_options_type() == BuiltinOptions_LSHProjectionOptions + ? static_cast<const LSHProjectionOptions *>(builtin_options()) + : nullptr; + } + const Pool2DOptions *builtin_options_as_Pool2DOptions() const + { + return builtin_options_type() == BuiltinOptions_Pool2DOptions + ? static_cast<const Pool2DOptions *>(builtin_options()) + : nullptr; + } + const SVDFOptions *builtin_options_as_SVDFOptions() const + { + return builtin_options_type() == BuiltinOptions_SVDFOptions + ? static_cast<const SVDFOptions *>(builtin_options()) + : nullptr; + } + const RNNOptions *builtin_options_as_RNNOptions() const + { + return builtin_options_type() == BuiltinOptions_RNNOptions + ? static_cast<const RNNOptions *>(builtin_options()) + : nullptr; + } + const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const + { + return builtin_options_type() == BuiltinOptions_FullyConnectedOptions + ? static_cast<const FullyConnectedOptions *>(builtin_options()) + : nullptr; + } + const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const + { + return builtin_options_type() == BuiltinOptions_SoftmaxOptions + ? static_cast<const SoftmaxOptions *>(builtin_options()) + : nullptr; + } + const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const + { + return builtin_options_type() == BuiltinOptions_ConcatenationOptions + ? static_cast<const ConcatenationOptions *>(builtin_options()) + : nullptr; + } + const AddOptions *builtin_options_as_AddOptions() const + { + return builtin_options_type() == BuiltinOptions_AddOptions + ? static_cast<const AddOptions *>(builtin_options()) + : nullptr; + } + const L2NormOptions *builtin_options_as_L2NormOptions() const + { + return builtin_options_type() == BuiltinOptions_L2NormOptions + ? static_cast<const L2NormOptions *>(builtin_options()) + : nullptr; + } + const LocalResponseNormalizationOptions * + builtin_options_as_LocalResponseNormalizationOptions() const + { + return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions + ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options()) + : nullptr; + } + const LSTMOptions *builtin_options_as_LSTMOptions() const + { + return builtin_options_type() == BuiltinOptions_LSTMOptions + ? static_cast<const LSTMOptions *>(builtin_options()) + : nullptr; + } + const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const + { + return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions + ? static_cast<const ResizeBilinearOptions *>(builtin_options()) + : nullptr; + } + const CallOptions *builtin_options_as_CallOptions() const + { + return builtin_options_type() == BuiltinOptions_CallOptions + ? static_cast<const CallOptions *>(builtin_options()) + : nullptr; + } + const ReshapeOptions *builtin_options_as_ReshapeOptions() const + { + return builtin_options_type() == BuiltinOptions_ReshapeOptions + ? static_cast<const ReshapeOptions *>(builtin_options()) + : nullptr; + } + const SkipGramOptions *builtin_options_as_SkipGramOptions() const + { + return builtin_options_type() == BuiltinOptions_SkipGramOptions + ? static_cast<const SkipGramOptions *>(builtin_options()) + : nullptr; + } + const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const + { + return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions + ? static_cast<const SpaceToDepthOptions *>(builtin_options()) + : nullptr; + } + const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const + { + return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions + ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options()) + : nullptr; + } + const MulOptions *builtin_options_as_MulOptions() const + { + return builtin_options_type() == BuiltinOptions_MulOptions + ? static_cast<const MulOptions *>(builtin_options()) + : nullptr; + } + const PadOptions *builtin_options_as_PadOptions() const + { + return builtin_options_type() == BuiltinOptions_PadOptions + ? static_cast<const PadOptions *>(builtin_options()) + : nullptr; + } + const GatherOptions *builtin_options_as_GatherOptions() const + { + return builtin_options_type() == BuiltinOptions_GatherOptions + ? static_cast<const GatherOptions *>(builtin_options()) + : nullptr; + } + const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const + { + return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions + ? static_cast<const BatchToSpaceNDOptions *>(builtin_options()) + : nullptr; + } + const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const + { + return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions + ? static_cast<const SpaceToBatchNDOptions *>(builtin_options()) + : nullptr; + } + const TransposeOptions *builtin_options_as_TransposeOptions() const + { + return builtin_options_type() == BuiltinOptions_TransposeOptions + ? static_cast<const TransposeOptions *>(builtin_options()) + : nullptr; + } + const ReducerOptions *builtin_options_as_ReducerOptions() const + { + return builtin_options_type() == BuiltinOptions_ReducerOptions + ? static_cast<const ReducerOptions *>(builtin_options()) + : nullptr; + } + const SubOptions *builtin_options_as_SubOptions() const + { + return builtin_options_type() == BuiltinOptions_SubOptions + ? static_cast<const SubOptions *>(builtin_options()) + : nullptr; + } + const DivOptions *builtin_options_as_DivOptions() const + { + return builtin_options_type() == BuiltinOptions_DivOptions + ? static_cast<const DivOptions *>(builtin_options()) + : nullptr; + } + const SqueezeOptions *builtin_options_as_SqueezeOptions() const + { + return builtin_options_type() == BuiltinOptions_SqueezeOptions + ? static_cast<const SqueezeOptions *>(builtin_options()) + : nullptr; + } + const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const + { + return builtin_options_type() == BuiltinOptions_SequenceRNNOptions + ? static_cast<const SequenceRNNOptions *>(builtin_options()) + : nullptr; + } + const StridedSliceOptions *builtin_options_as_StridedSliceOptions() const + { + return builtin_options_type() == BuiltinOptions_StridedSliceOptions + ? static_cast<const StridedSliceOptions *>(builtin_options()) + : nullptr; + } + const ExpOptions *builtin_options_as_ExpOptions() const + { + return builtin_options_type() == BuiltinOptions_ExpOptions + ? static_cast<const ExpOptions *>(builtin_options()) + : nullptr; + } + const TopKV2Options *builtin_options_as_TopKV2Options() const + { + return builtin_options_type() == BuiltinOptions_TopKV2Options + ? static_cast<const TopKV2Options *>(builtin_options()) + : nullptr; + } + const SplitOptions *builtin_options_as_SplitOptions() const + { + return builtin_options_type() == BuiltinOptions_SplitOptions + ? static_cast<const SplitOptions *>(builtin_options()) + : nullptr; + } + const LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const + { + return builtin_options_type() == BuiltinOptions_LogSoftmaxOptions + ? static_cast<const LogSoftmaxOptions *>(builtin_options()) + : nullptr; + } + const CastOptions *builtin_options_as_CastOptions() const + { + return builtin_options_type() == BuiltinOptions_CastOptions + ? static_cast<const CastOptions *>(builtin_options()) + : nullptr; + } + const DequantizeOptions *builtin_options_as_DequantizeOptions() const + { + return builtin_options_type() == BuiltinOptions_DequantizeOptions + ? static_cast<const DequantizeOptions *>(builtin_options()) + : nullptr; + } + const MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const + { + return builtin_options_type() == BuiltinOptions_MaximumMinimumOptions + ? static_cast<const MaximumMinimumOptions *>(builtin_options()) + : nullptr; + } + const ArgMaxOptions *builtin_options_as_ArgMaxOptions() const + { + return builtin_options_type() == BuiltinOptions_ArgMaxOptions + ? static_cast<const ArgMaxOptions *>(builtin_options()) + : nullptr; + } + const LessOptions *builtin_options_as_LessOptions() const + { + return builtin_options_type() == BuiltinOptions_LessOptions + ? static_cast<const LessOptions *>(builtin_options()) + : nullptr; + } + const NegOptions *builtin_options_as_NegOptions() const + { + return builtin_options_type() == BuiltinOptions_NegOptions + ? static_cast<const NegOptions *>(builtin_options()) + : nullptr; + } + const PadV2Options *builtin_options_as_PadV2Options() const + { + return builtin_options_type() == BuiltinOptions_PadV2Options + ? static_cast<const PadV2Options *>(builtin_options()) + : nullptr; + } + const GreaterOptions *builtin_options_as_GreaterOptions() const + { + return builtin_options_type() == BuiltinOptions_GreaterOptions + ? static_cast<const GreaterOptions *>(builtin_options()) + : nullptr; + } + const GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const + { + return builtin_options_type() == BuiltinOptions_GreaterEqualOptions + ? static_cast<const GreaterEqualOptions *>(builtin_options()) + : nullptr; + } + const LessEqualOptions *builtin_options_as_LessEqualOptions() const + { + return builtin_options_type() == BuiltinOptions_LessEqualOptions + ? static_cast<const LessEqualOptions *>(builtin_options()) + : nullptr; + } + const SelectOptions *builtin_options_as_SelectOptions() const + { + return builtin_options_type() == BuiltinOptions_SelectOptions + ? static_cast<const SelectOptions *>(builtin_options()) + : nullptr; + } + const SliceOptions *builtin_options_as_SliceOptions() const + { + return builtin_options_type() == BuiltinOptions_SliceOptions + ? static_cast<const SliceOptions *>(builtin_options()) + : nullptr; + } + const TransposeConvOptions *builtin_options_as_TransposeConvOptions() const + { + return builtin_options_type() == BuiltinOptions_TransposeConvOptions + ? static_cast<const TransposeConvOptions *>(builtin_options()) + : nullptr; + } + const SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const + { + return builtin_options_type() == BuiltinOptions_SparseToDenseOptions + ? static_cast<const SparseToDenseOptions *>(builtin_options()) + : nullptr; + } + const TileOptions *builtin_options_as_TileOptions() const + { + return builtin_options_type() == BuiltinOptions_TileOptions + ? static_cast<const TileOptions *>(builtin_options()) + : nullptr; + } + const ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const + { + return builtin_options_type() == BuiltinOptions_ExpandDimsOptions + ? static_cast<const ExpandDimsOptions *>(builtin_options()) + : nullptr; + } + const EqualOptions *builtin_options_as_EqualOptions() const + { + return builtin_options_type() == BuiltinOptions_EqualOptions + ? static_cast<const EqualOptions *>(builtin_options()) + : nullptr; + } + const NotEqualOptions *builtin_options_as_NotEqualOptions() const + { + return builtin_options_type() == BuiltinOptions_NotEqualOptions + ? static_cast<const NotEqualOptions *>(builtin_options()) + : nullptr; + } + const ShapeOptions *builtin_options_as_ShapeOptions() const + { + return builtin_options_type() == BuiltinOptions_ShapeOptions + ? static_cast<const ShapeOptions *>(builtin_options()) + : nullptr; + } + const PowOptions *builtin_options_as_PowOptions() const + { + return builtin_options_type() == BuiltinOptions_PowOptions + ? static_cast<const PowOptions *>(builtin_options()) + : nullptr; + } + const ArgMinOptions *builtin_options_as_ArgMinOptions() const + { + return builtin_options_type() == BuiltinOptions_ArgMinOptions + ? static_cast<const ArgMinOptions *>(builtin_options()) + : nullptr; + } + const FakeQuantOptions *builtin_options_as_FakeQuantOptions() const + { + return builtin_options_type() == BuiltinOptions_FakeQuantOptions + ? static_cast<const FakeQuantOptions *>(builtin_options()) + : nullptr; + } + const PackOptions *builtin_options_as_PackOptions() const + { + return builtin_options_type() == BuiltinOptions_PackOptions + ? static_cast<const PackOptions *>(builtin_options()) + : nullptr; + } + const LogicalOrOptions *builtin_options_as_LogicalOrOptions() const + { + return builtin_options_type() == BuiltinOptions_LogicalOrOptions + ? static_cast<const LogicalOrOptions *>(builtin_options()) + : nullptr; + } + const OneHotOptions *builtin_options_as_OneHotOptions() const + { + return builtin_options_type() == BuiltinOptions_OneHotOptions + ? static_cast<const OneHotOptions *>(builtin_options()) + : nullptr; + } + const LogicalAndOptions *builtin_options_as_LogicalAndOptions() const + { + return builtin_options_type() == BuiltinOptions_LogicalAndOptions + ? static_cast<const LogicalAndOptions *>(builtin_options()) + : nullptr; + } + const LogicalNotOptions *builtin_options_as_LogicalNotOptions() const + { + return builtin_options_type() == BuiltinOptions_LogicalNotOptions + ? static_cast<const LogicalNotOptions *>(builtin_options()) + : nullptr; + } + const UnpackOptions *builtin_options_as_UnpackOptions() const + { + return builtin_options_type() == BuiltinOptions_UnpackOptions + ? static_cast<const UnpackOptions *>(builtin_options()) + : nullptr; + } + const FloorDivOptions *builtin_options_as_FloorDivOptions() const + { + return builtin_options_type() == BuiltinOptions_FloorDivOptions + ? static_cast<const FloorDivOptions *>(builtin_options()) + : nullptr; + } + const SquareOptions *builtin_options_as_SquareOptions() const + { + return builtin_options_type() == BuiltinOptions_SquareOptions + ? static_cast<const SquareOptions *>(builtin_options()) + : nullptr; + } + const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const + { + return builtin_options_type() == BuiltinOptions_ZerosLikeOptions + ? static_cast<const ZerosLikeOptions *>(builtin_options()) + : nullptr; + } + const FillOptions *builtin_options_as_FillOptions() const + { + return builtin_options_type() == BuiltinOptions_FillOptions + ? static_cast<const FillOptions *>(builtin_options()) + : nullptr; + } + const BidirectionalSequenceLSTMOptions * + builtin_options_as_BidirectionalSequenceLSTMOptions() const + { + return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions + ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options()) + : nullptr; + } + const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const + { + return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions + ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options()) + : nullptr; + } + const UnidirectionalSequenceLSTMOptions * + builtin_options_as_UnidirectionalSequenceLSTMOptions() const + { + return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions + ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options()) + : nullptr; + } + const FloorModOptions *builtin_options_as_FloorModOptions() const + { + return builtin_options_type() == BuiltinOptions_FloorModOptions + ? static_cast<const FloorModOptions *>(builtin_options()) + : nullptr; + } + const RangeOptions *builtin_options_as_RangeOptions() const + { + return builtin_options_type() == BuiltinOptions_RangeOptions + ? static_cast<const RangeOptions *>(builtin_options()) + : nullptr; + } + const ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const + { + return builtin_options_type() == BuiltinOptions_ResizeNearestNeighborOptions + ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options()) + : nullptr; + } + const LeakyReluOptions *builtin_options_as_LeakyReluOptions() const + { + return builtin_options_type() == BuiltinOptions_LeakyReluOptions + ? static_cast<const LeakyReluOptions *>(builtin_options()) + : nullptr; + } + const SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const + { + return builtin_options_type() == BuiltinOptions_SquaredDifferenceOptions + ? static_cast<const SquaredDifferenceOptions *>(builtin_options()) + : nullptr; + } + const MirrorPadOptions *builtin_options_as_MirrorPadOptions() const + { + return builtin_options_type() == BuiltinOptions_MirrorPadOptions + ? static_cast<const MirrorPadOptions *>(builtin_options()) + : nullptr; + } + const AbsOptions *builtin_options_as_AbsOptions() const + { + return builtin_options_type() == BuiltinOptions_AbsOptions + ? static_cast<const AbsOptions *>(builtin_options()) + : nullptr; + } + const SplitVOptions *builtin_options_as_SplitVOptions() const + { + return builtin_options_type() == BuiltinOptions_SplitVOptions + ? static_cast<const SplitVOptions *>(builtin_options()) + : nullptr; + } + const InstanceNormOptions *builtin_options_as_InstanceNormOptions() const + { + return builtin_options_type() == BuiltinOptions_InstanceNormOptions + ? static_cast<const InstanceNormOptions *>(builtin_options()) + : nullptr; + } + const flatbuffers::Vector<uint8_t> *custom_options() const + { + return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS); + } + CustomOptionsFormat custom_options_format() const + { + return static_cast<CustomOptionsFormat>(GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0)); + } + const flatbuffers::Vector<uint8_t> *mutating_variable_inputs() const + { + return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_MUTATING_VARIABLE_INPUTS); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_OPCODE_INDEX) && + VerifyOffset(verifier, VT_INPUTS) && verifier.VerifyVector(inputs()) && + VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) && + VerifyField<uint8_t>(verifier, VT_BUILTIN_OPTIONS_TYPE) && + VerifyOffset(verifier, VT_BUILTIN_OPTIONS) && + VerifyBuiltinOptions(verifier, builtin_options(), builtin_options_type()) && + VerifyOffset(verifier, VT_CUSTOM_OPTIONS) && verifier.VerifyVector(custom_options()) && + VerifyField<int8_t>(verifier, VT_CUSTOM_OPTIONS_FORMAT) && + VerifyOffset(verifier, VT_MUTATING_VARIABLE_INPUTS) && + verifier.VerifyVector(mutating_variable_inputs()) && verifier.EndTable(); + } +}; + +template <> inline const Conv2DOptions *Operator::builtin_options_as<Conv2DOptions>() const +{ + return builtin_options_as_Conv2DOptions(); +} + +template <> +inline const DepthwiseConv2DOptions *Operator::builtin_options_as<DepthwiseConv2DOptions>() const +{ + return builtin_options_as_DepthwiseConv2DOptions(); +} + +template <> +inline const ConcatEmbeddingsOptions *Operator::builtin_options_as<ConcatEmbeddingsOptions>() const +{ + return builtin_options_as_ConcatEmbeddingsOptions(); +} + +template <> +inline const LSHProjectionOptions *Operator::builtin_options_as<LSHProjectionOptions>() const +{ + return builtin_options_as_LSHProjectionOptions(); +} + +template <> inline const Pool2DOptions *Operator::builtin_options_as<Pool2DOptions>() const +{ + return builtin_options_as_Pool2DOptions(); +} + +template <> inline const SVDFOptions *Operator::builtin_options_as<SVDFOptions>() const +{ + return builtin_options_as_SVDFOptions(); +} + +template <> inline const RNNOptions *Operator::builtin_options_as<RNNOptions>() const +{ + return builtin_options_as_RNNOptions(); +} + +template <> +inline const FullyConnectedOptions *Operator::builtin_options_as<FullyConnectedOptions>() const +{ + return builtin_options_as_FullyConnectedOptions(); +} + +template <> inline const SoftmaxOptions *Operator::builtin_options_as<SoftmaxOptions>() const +{ + return builtin_options_as_SoftmaxOptions(); +} + +template <> +inline const ConcatenationOptions *Operator::builtin_options_as<ConcatenationOptions>() const +{ + return builtin_options_as_ConcatenationOptions(); +} + +template <> inline const AddOptions *Operator::builtin_options_as<AddOptions>() const +{ + return builtin_options_as_AddOptions(); +} + +template <> inline const L2NormOptions *Operator::builtin_options_as<L2NormOptions>() const +{ + return builtin_options_as_L2NormOptions(); +} + +template <> +inline const LocalResponseNormalizationOptions * +Operator::builtin_options_as<LocalResponseNormalizationOptions>() const +{ + return builtin_options_as_LocalResponseNormalizationOptions(); +} + +template <> inline const LSTMOptions *Operator::builtin_options_as<LSTMOptions>() const +{ + return builtin_options_as_LSTMOptions(); +} + +template <> +inline const ResizeBilinearOptions *Operator::builtin_options_as<ResizeBilinearOptions>() const +{ + return builtin_options_as_ResizeBilinearOptions(); +} + +template <> inline const CallOptions *Operator::builtin_options_as<CallOptions>() const +{ + return builtin_options_as_CallOptions(); +} + +template <> inline const ReshapeOptions *Operator::builtin_options_as<ReshapeOptions>() const +{ + return builtin_options_as_ReshapeOptions(); +} + +template <> inline const SkipGramOptions *Operator::builtin_options_as<SkipGramOptions>() const +{ + return builtin_options_as_SkipGramOptions(); +} + +template <> +inline const SpaceToDepthOptions *Operator::builtin_options_as<SpaceToDepthOptions>() const +{ + return builtin_options_as_SpaceToDepthOptions(); +} + +template <> +inline const EmbeddingLookupSparseOptions * +Operator::builtin_options_as<EmbeddingLookupSparseOptions>() const +{ + return builtin_options_as_EmbeddingLookupSparseOptions(); +} + +template <> inline const MulOptions *Operator::builtin_options_as<MulOptions>() const +{ + return builtin_options_as_MulOptions(); +} + +template <> inline const PadOptions *Operator::builtin_options_as<PadOptions>() const +{ + return builtin_options_as_PadOptions(); +} + +template <> inline const GatherOptions *Operator::builtin_options_as<GatherOptions>() const +{ + return builtin_options_as_GatherOptions(); +} + +template <> +inline const BatchToSpaceNDOptions *Operator::builtin_options_as<BatchToSpaceNDOptions>() const +{ + return builtin_options_as_BatchToSpaceNDOptions(); +} + +template <> +inline const SpaceToBatchNDOptions *Operator::builtin_options_as<SpaceToBatchNDOptions>() const +{ + return builtin_options_as_SpaceToBatchNDOptions(); +} + +template <> inline const TransposeOptions *Operator::builtin_options_as<TransposeOptions>() const +{ + return builtin_options_as_TransposeOptions(); +} + +template <> inline const ReducerOptions *Operator::builtin_options_as<ReducerOptions>() const +{ + return builtin_options_as_ReducerOptions(); +} + +template <> inline const SubOptions *Operator::builtin_options_as<SubOptions>() const +{ + return builtin_options_as_SubOptions(); +} + +template <> inline const DivOptions *Operator::builtin_options_as<DivOptions>() const +{ + return builtin_options_as_DivOptions(); +} + +template <> inline const SqueezeOptions *Operator::builtin_options_as<SqueezeOptions>() const +{ + return builtin_options_as_SqueezeOptions(); +} + +template <> +inline const SequenceRNNOptions *Operator::builtin_options_as<SequenceRNNOptions>() const +{ + return builtin_options_as_SequenceRNNOptions(); +} + +template <> +inline const StridedSliceOptions *Operator::builtin_options_as<StridedSliceOptions>() const +{ + return builtin_options_as_StridedSliceOptions(); +} + +template <> inline const ExpOptions *Operator::builtin_options_as<ExpOptions>() const +{ + return builtin_options_as_ExpOptions(); +} + +template <> inline const TopKV2Options *Operator::builtin_options_as<TopKV2Options>() const +{ + return builtin_options_as_TopKV2Options(); +} + +template <> inline const SplitOptions *Operator::builtin_options_as<SplitOptions>() const +{ + return builtin_options_as_SplitOptions(); +} + +template <> inline const LogSoftmaxOptions *Operator::builtin_options_as<LogSoftmaxOptions>() const +{ + return builtin_options_as_LogSoftmaxOptions(); +} + +template <> inline const CastOptions *Operator::builtin_options_as<CastOptions>() const +{ + return builtin_options_as_CastOptions(); +} + +template <> inline const DequantizeOptions *Operator::builtin_options_as<DequantizeOptions>() const +{ + return builtin_options_as_DequantizeOptions(); +} + +template <> +inline const MaximumMinimumOptions *Operator::builtin_options_as<MaximumMinimumOptions>() const +{ + return builtin_options_as_MaximumMinimumOptions(); +} + +template <> inline const ArgMaxOptions *Operator::builtin_options_as<ArgMaxOptions>() const +{ + return builtin_options_as_ArgMaxOptions(); +} + +template <> inline const LessOptions *Operator::builtin_options_as<LessOptions>() const +{ + return builtin_options_as_LessOptions(); +} + +template <> inline const NegOptions *Operator::builtin_options_as<NegOptions>() const +{ + return builtin_options_as_NegOptions(); +} + +template <> inline const PadV2Options *Operator::builtin_options_as<PadV2Options>() const +{ + return builtin_options_as_PadV2Options(); +} + +template <> inline const GreaterOptions *Operator::builtin_options_as<GreaterOptions>() const +{ + return builtin_options_as_GreaterOptions(); +} + +template <> +inline const GreaterEqualOptions *Operator::builtin_options_as<GreaterEqualOptions>() const +{ + return builtin_options_as_GreaterEqualOptions(); +} + +template <> inline const LessEqualOptions *Operator::builtin_options_as<LessEqualOptions>() const +{ + return builtin_options_as_LessEqualOptions(); +} + +template <> inline const SelectOptions *Operator::builtin_options_as<SelectOptions>() const +{ + return builtin_options_as_SelectOptions(); +} + +template <> inline const SliceOptions *Operator::builtin_options_as<SliceOptions>() const +{ + return builtin_options_as_SliceOptions(); +} + +template <> +inline const TransposeConvOptions *Operator::builtin_options_as<TransposeConvOptions>() const +{ + return builtin_options_as_TransposeConvOptions(); +} + +template <> +inline const SparseToDenseOptions *Operator::builtin_options_as<SparseToDenseOptions>() const +{ + return builtin_options_as_SparseToDenseOptions(); +} + +template <> inline const TileOptions *Operator::builtin_options_as<TileOptions>() const +{ + return builtin_options_as_TileOptions(); +} + +template <> inline const ExpandDimsOptions *Operator::builtin_options_as<ExpandDimsOptions>() const +{ + return builtin_options_as_ExpandDimsOptions(); +} + +template <> inline const EqualOptions *Operator::builtin_options_as<EqualOptions>() const +{ + return builtin_options_as_EqualOptions(); +} + +template <> inline const NotEqualOptions *Operator::builtin_options_as<NotEqualOptions>() const +{ + return builtin_options_as_NotEqualOptions(); +} + +template <> inline const ShapeOptions *Operator::builtin_options_as<ShapeOptions>() const +{ + return builtin_options_as_ShapeOptions(); +} + +template <> inline const PowOptions *Operator::builtin_options_as<PowOptions>() const +{ + return builtin_options_as_PowOptions(); +} + +template <> inline const ArgMinOptions *Operator::builtin_options_as<ArgMinOptions>() const +{ + return builtin_options_as_ArgMinOptions(); +} + +template <> inline const FakeQuantOptions *Operator::builtin_options_as<FakeQuantOptions>() const +{ + return builtin_options_as_FakeQuantOptions(); +} + +template <> inline const PackOptions *Operator::builtin_options_as<PackOptions>() const +{ + return builtin_options_as_PackOptions(); +} + +template <> inline const LogicalOrOptions *Operator::builtin_options_as<LogicalOrOptions>() const +{ + return builtin_options_as_LogicalOrOptions(); +} + +template <> inline const OneHotOptions *Operator::builtin_options_as<OneHotOptions>() const +{ + return builtin_options_as_OneHotOptions(); +} + +template <> inline const LogicalAndOptions *Operator::builtin_options_as<LogicalAndOptions>() const +{ + return builtin_options_as_LogicalAndOptions(); +} + +template <> inline const LogicalNotOptions *Operator::builtin_options_as<LogicalNotOptions>() const +{ + return builtin_options_as_LogicalNotOptions(); +} + +template <> inline const UnpackOptions *Operator::builtin_options_as<UnpackOptions>() const +{ + return builtin_options_as_UnpackOptions(); +} + +template <> inline const FloorDivOptions *Operator::builtin_options_as<FloorDivOptions>() const +{ + return builtin_options_as_FloorDivOptions(); +} + +template <> inline const SquareOptions *Operator::builtin_options_as<SquareOptions>() const +{ + return builtin_options_as_SquareOptions(); +} + +template <> inline const ZerosLikeOptions *Operator::builtin_options_as<ZerosLikeOptions>() const +{ + return builtin_options_as_ZerosLikeOptions(); +} + +template <> inline const FillOptions *Operator::builtin_options_as<FillOptions>() const +{ + return builtin_options_as_FillOptions(); +} + +template <> +inline const BidirectionalSequenceLSTMOptions * +Operator::builtin_options_as<BidirectionalSequenceLSTMOptions>() const +{ + return builtin_options_as_BidirectionalSequenceLSTMOptions(); +} + +template <> +inline const BidirectionalSequenceRNNOptions * +Operator::builtin_options_as<BidirectionalSequenceRNNOptions>() const +{ + return builtin_options_as_BidirectionalSequenceRNNOptions(); +} + +template <> +inline const UnidirectionalSequenceLSTMOptions * +Operator::builtin_options_as<UnidirectionalSequenceLSTMOptions>() const +{ + return builtin_options_as_UnidirectionalSequenceLSTMOptions(); +} + +template <> inline const FloorModOptions *Operator::builtin_options_as<FloorModOptions>() const +{ + return builtin_options_as_FloorModOptions(); +} + +template <> inline const RangeOptions *Operator::builtin_options_as<RangeOptions>() const +{ + return builtin_options_as_RangeOptions(); +} + +template <> +inline const ResizeNearestNeighborOptions * +Operator::builtin_options_as<ResizeNearestNeighborOptions>() const +{ + return builtin_options_as_ResizeNearestNeighborOptions(); +} + +template <> inline const LeakyReluOptions *Operator::builtin_options_as<LeakyReluOptions>() const +{ + return builtin_options_as_LeakyReluOptions(); +} + +template <> +inline const SquaredDifferenceOptions * +Operator::builtin_options_as<SquaredDifferenceOptions>() const +{ + return builtin_options_as_SquaredDifferenceOptions(); +} + +template <> inline const MirrorPadOptions *Operator::builtin_options_as<MirrorPadOptions>() const +{ + return builtin_options_as_MirrorPadOptions(); +} + +template <> inline const AbsOptions *Operator::builtin_options_as<AbsOptions>() const +{ + return builtin_options_as_AbsOptions(); +} + +template <> inline const SplitVOptions *Operator::builtin_options_as<SplitVOptions>() const +{ + return builtin_options_as_SplitVOptions(); +} + +template <> +inline const InstanceNormOptions *Operator::builtin_options_as<InstanceNormOptions>() const +{ + return builtin_options_as_InstanceNormOptions(); +} + +struct OperatorBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_opcode_index(uint32_t opcode_index) + { + fbb_.AddElement<uint32_t>(Operator::VT_OPCODE_INDEX, opcode_index, 0); + } + void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs) + { + fbb_.AddOffset(Operator::VT_INPUTS, inputs); + } + void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs) + { + fbb_.AddOffset(Operator::VT_OUTPUTS, outputs); + } + void add_builtin_options_type(BuiltinOptions builtin_options_type) + { + fbb_.AddElement<uint8_t>(Operator::VT_BUILTIN_OPTIONS_TYPE, + static_cast<uint8_t>(builtin_options_type), 0); + } + void add_builtin_options(flatbuffers::Offset<void> builtin_options) + { + fbb_.AddOffset(Operator::VT_BUILTIN_OPTIONS, builtin_options); + } + void add_custom_options(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options) + { + fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options); + } + void add_custom_options_format(CustomOptionsFormat custom_options_format) + { + fbb_.AddElement<int8_t>(Operator::VT_CUSTOM_OPTIONS_FORMAT, + static_cast<int8_t>(custom_options_format), 0); + } + void add_mutating_variable_inputs( + flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs) + { + fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs); + } + explicit OperatorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + OperatorBuilder &operator=(const OperatorBuilder &); + flatbuffers::Offset<Operator> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<Operator>(end); + return o; + } +}; + +inline flatbuffers::Offset<Operator> +CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0, + BuiltinOptions builtin_options_type = BuiltinOptions_NONE, + flatbuffers::Offset<void> builtin_options = 0, + flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0, + CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS, + flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0) +{ + OperatorBuilder builder_(_fbb); + builder_.add_mutating_variable_inputs(mutating_variable_inputs); + builder_.add_custom_options(custom_options); + builder_.add_builtin_options(builtin_options); + builder_.add_outputs(outputs); + builder_.add_inputs(inputs); + builder_.add_opcode_index(opcode_index); + builder_.add_custom_options_format(custom_options_format); + builder_.add_builtin_options_type(builtin_options_type); + return builder_.Finish(); +} + +inline flatbuffers::Offset<Operator> +CreateOperatorDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, + const std::vector<int32_t> *inputs = nullptr, + const std::vector<int32_t> *outputs = nullptr, + BuiltinOptions builtin_options_type = BuiltinOptions_NONE, + flatbuffers::Offset<void> builtin_options = 0, + const std::vector<uint8_t> *custom_options = nullptr, + CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS, + const std::vector<uint8_t> *mutating_variable_inputs = nullptr) +{ + return circle::CreateOperator( + _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0, + outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options, + custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format, + mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0); +} + +struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_TENSORS = 4, + VT_INPUTS = 6, + VT_OUTPUTS = 8, + VT_OPERATORS = 10, + VT_NAME = 12, + VT_DATA_FORMAT = 14 + }; + const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *tensors() const + { + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *>(VT_TENSORS); + } + const flatbuffers::Vector<int32_t> *inputs() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS); + } + const flatbuffers::Vector<int32_t> *outputs() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS); + } + const flatbuffers::Vector<flatbuffers::Offset<Operator>> *operators() const + { + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Operator>> *>(VT_OPERATORS); + } + const flatbuffers::String *name() const + { + return GetPointer<const flatbuffers::String *>(VT_NAME); + } + DataFormat data_format() const + { + return static_cast<DataFormat>(GetField<int8_t>(VT_DATA_FORMAT, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_TENSORS) && + verifier.VerifyVector(tensors()) && verifier.VerifyVectorOfTables(tensors()) && + VerifyOffset(verifier, VT_INPUTS) && verifier.VerifyVector(inputs()) && + VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) && + VerifyOffset(verifier, VT_OPERATORS) && verifier.VerifyVector(operators()) && + verifier.VerifyVectorOfTables(operators()) && VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && VerifyField<int8_t>(verifier, VT_DATA_FORMAT) && + verifier.EndTable(); + } +}; + +struct SubGraphBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors) + { + fbb_.AddOffset(SubGraph::VT_TENSORS, tensors); + } + void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs) + { + fbb_.AddOffset(SubGraph::VT_INPUTS, inputs); + } + void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs) + { + fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs); + } + void + add_operators(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators) + { + fbb_.AddOffset(SubGraph::VT_OPERATORS, operators); + } + void add_name(flatbuffers::Offset<flatbuffers::String> name) + { + fbb_.AddOffset(SubGraph::VT_NAME, name); + } + void add_data_format(DataFormat data_format) + { + fbb_.AddElement<int8_t>(SubGraph::VT_DATA_FORMAT, static_cast<int8_t>(data_format), 0); + } + explicit SubGraphBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SubGraphBuilder &operator=(const SubGraphBuilder &); + flatbuffers::Offset<SubGraph> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SubGraph>(end); + return o; + } +}; + +inline flatbuffers::Offset<SubGraph> CreateSubGraph( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0, + flatbuffers::Offset<flatbuffers::String> name = 0, + DataFormat data_format = DataFormat_CHANNELS_LAST) +{ + SubGraphBuilder builder_(_fbb); + builder_.add_name(name); + builder_.add_operators(operators); + builder_.add_outputs(outputs); + builder_.add_inputs(inputs); + builder_.add_tensors(tensors); + builder_.add_data_format(data_format); + return builder_.Finish(); +} + +inline flatbuffers::Offset<SubGraph> +CreateSubGraphDirect(flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr, + const std::vector<int32_t> *inputs = nullptr, + const std::vector<int32_t> *outputs = nullptr, + const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr, + const char *name = nullptr, DataFormat data_format = DataFormat_CHANNELS_LAST) +{ + return circle::CreateSubGraph( + _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0, + inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0, + outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, + operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0, + name ? _fbb.CreateString(name) : 0, data_format); +} + +struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_DATA = 4 + }; + const flatbuffers::Vector<uint8_t> *data() const + { + return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_DATA); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_DATA) && + verifier.VerifyVector(data()) && verifier.EndTable(); + } +}; + +struct BufferBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_data(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data) + { + fbb_.AddOffset(Buffer::VT_DATA, data); + } + explicit BufferBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + BufferBuilder &operator=(const BufferBuilder &); + flatbuffers::Offset<Buffer> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<Buffer>(end); + return o; + } +}; + +inline flatbuffers::Offset<Buffer> +CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data = 0) +{ + BufferBuilder builder_(_fbb); + builder_.add_data(data); + return builder_.Finish(); +} + +inline flatbuffers::Offset<Buffer> CreateBufferDirect(flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<uint8_t> *data = nullptr) +{ + return circle::CreateBuffer(_fbb, data ? _fbb.CreateVector<uint8_t>(*data) : 0); +} + +struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_VERSION = 4, + VT_OPERATOR_CODES = 6, + VT_SUBGRAPHS = 8, + VT_DESCRIPTION = 10, + VT_BUFFERS = 12, + VT_METADATA_BUFFER = 14 + }; + uint32_t version() const { return GetField<uint32_t>(VT_VERSION, 0); } + const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes() const + { + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>( + VT_OPERATOR_CODES); + } + const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *subgraphs() const + { + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *>(VT_SUBGRAPHS); + } + const flatbuffers::String *description() const + { + return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION); + } + const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *buffers() const + { + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *>(VT_BUFFERS); + } + const flatbuffers::Vector<int32_t> *metadata_buffer() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_METADATA_BUFFER); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_VERSION) && + VerifyOffset(verifier, VT_OPERATOR_CODES) && verifier.VerifyVector(operator_codes()) && + verifier.VerifyVectorOfTables(operator_codes()) && + VerifyOffset(verifier, VT_SUBGRAPHS) && verifier.VerifyVector(subgraphs()) && + verifier.VerifyVectorOfTables(subgraphs()) && VerifyOffset(verifier, VT_DESCRIPTION) && + verifier.VerifyString(description()) && VerifyOffset(verifier, VT_BUFFERS) && + verifier.VerifyVector(buffers()) && verifier.VerifyVectorOfTables(buffers()) && + VerifyOffset(verifier, VT_METADATA_BUFFER) && verifier.VerifyVector(metadata_buffer()) && + verifier.EndTable(); + } +}; + +struct ModelBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); } + void add_operator_codes( + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes) + { + fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes); + } + void + add_subgraphs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs) + { + fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs); + } + void add_description(flatbuffers::Offset<flatbuffers::String> description) + { + fbb_.AddOffset(Model::VT_DESCRIPTION, description); + } + void add_buffers(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers) + { + fbb_.AddOffset(Model::VT_BUFFERS, buffers); + } + void add_metadata_buffer(flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer) + { + fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer); + } + explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ModelBuilder &operator=(const ModelBuilder &); + flatbuffers::Offset<Model> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<Model>(end); + return o; + } +}; + +inline flatbuffers::Offset<Model> CreateModel( + flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0, + flatbuffers::Offset<flatbuffers::String> description = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0) +{ + ModelBuilder builder_(_fbb); + builder_.add_metadata_buffer(metadata_buffer); + builder_.add_buffers(buffers); + builder_.add_description(description); + builder_.add_subgraphs(subgraphs); + builder_.add_operator_codes(operator_codes); + builder_.add_version(version); + return builder_.Finish(); +} + +inline flatbuffers::Offset<Model> +CreateModelDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, + const std::vector<flatbuffers::Offset<OperatorCode>> *operator_codes = nullptr, + const std::vector<flatbuffers::Offset<SubGraph>> *subgraphs = nullptr, + const char *description = nullptr, + const std::vector<flatbuffers::Offset<Buffer>> *buffers = nullptr, + const std::vector<int32_t> *metadata_buffer = nullptr) +{ + return circle::CreateModel( + _fbb, version, + operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0, + subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0, + description ? _fbb.CreateString(description) : 0, + buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0, + metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0); +} + +inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj, + QuantizationDetails type) +{ + switch (type) + { + case QuantizationDetails_NONE: + { + return true; + } + case QuantizationDetails_CustomQuantization: + { + auto ptr = reinterpret_cast<const CustomQuantization *>(obj); + return verifier.VerifyTable(ptr); + } + default: + return false; + } +} + +inline bool +VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier, + const flatbuffers::Vector<flatbuffers::Offset<void>> *values, + const flatbuffers::Vector<uint8_t> *types) +{ + if (!values || !types) + return !values && !types; + if (values->size() != types->size()) + return false; + for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) + { + if (!VerifyQuantizationDetails(verifier, values->Get(i), + types->GetEnum<QuantizationDetails>(i))) + { + return false; + } + } + return true; +} + +inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, + BuiltinOptions type) +{ + switch (type) + { + case BuiltinOptions_NONE: + { + return true; + } + case BuiltinOptions_Conv2DOptions: + { + auto ptr = reinterpret_cast<const Conv2DOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DepthwiseConv2DOptions: + { + auto ptr = reinterpret_cast<const DepthwiseConv2DOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ConcatEmbeddingsOptions: + { + auto ptr = reinterpret_cast<const ConcatEmbeddingsOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LSHProjectionOptions: + { + auto ptr = reinterpret_cast<const LSHProjectionOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_Pool2DOptions: + { + auto ptr = reinterpret_cast<const Pool2DOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SVDFOptions: + { + auto ptr = reinterpret_cast<const SVDFOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_RNNOptions: + { + auto ptr = reinterpret_cast<const RNNOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FullyConnectedOptions: + { + auto ptr = reinterpret_cast<const FullyConnectedOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SoftmaxOptions: + { + auto ptr = reinterpret_cast<const SoftmaxOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ConcatenationOptions: + { + auto ptr = reinterpret_cast<const ConcatenationOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_AddOptions: + { + auto ptr = reinterpret_cast<const AddOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_L2NormOptions: + { + auto ptr = reinterpret_cast<const L2NormOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LocalResponseNormalizationOptions: + { + auto ptr = reinterpret_cast<const LocalResponseNormalizationOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LSTMOptions: + { + auto ptr = reinterpret_cast<const LSTMOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ResizeBilinearOptions: + { + auto ptr = reinterpret_cast<const ResizeBilinearOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CallOptions: + { + auto ptr = reinterpret_cast<const CallOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ReshapeOptions: + { + auto ptr = reinterpret_cast<const ReshapeOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SkipGramOptions: + { + auto ptr = reinterpret_cast<const SkipGramOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SpaceToDepthOptions: + { + auto ptr = reinterpret_cast<const SpaceToDepthOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_EmbeddingLookupSparseOptions: + { + auto ptr = reinterpret_cast<const EmbeddingLookupSparseOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_MulOptions: + { + auto ptr = reinterpret_cast<const MulOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_PadOptions: + { + auto ptr = reinterpret_cast<const PadOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_GatherOptions: + { + auto ptr = reinterpret_cast<const GatherOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BatchToSpaceNDOptions: + { + auto ptr = reinterpret_cast<const BatchToSpaceNDOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SpaceToBatchNDOptions: + { + auto ptr = reinterpret_cast<const SpaceToBatchNDOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_TransposeOptions: + { + auto ptr = reinterpret_cast<const TransposeOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ReducerOptions: + { + auto ptr = reinterpret_cast<const ReducerOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SubOptions: + { + auto ptr = reinterpret_cast<const SubOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DivOptions: + { + auto ptr = reinterpret_cast<const DivOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SqueezeOptions: + { + auto ptr = reinterpret_cast<const SqueezeOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SequenceRNNOptions: + { + auto ptr = reinterpret_cast<const SequenceRNNOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_StridedSliceOptions: + { + auto ptr = reinterpret_cast<const StridedSliceOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ExpOptions: + { + auto ptr = reinterpret_cast<const ExpOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_TopKV2Options: + { + auto ptr = reinterpret_cast<const TopKV2Options *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SplitOptions: + { + auto ptr = reinterpret_cast<const SplitOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LogSoftmaxOptions: + { + auto ptr = reinterpret_cast<const LogSoftmaxOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CastOptions: + { + auto ptr = reinterpret_cast<const CastOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DequantizeOptions: + { + auto ptr = reinterpret_cast<const DequantizeOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_MaximumMinimumOptions: + { + auto ptr = reinterpret_cast<const MaximumMinimumOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ArgMaxOptions: + { + auto ptr = reinterpret_cast<const ArgMaxOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LessOptions: + { + auto ptr = reinterpret_cast<const LessOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_NegOptions: + { + auto ptr = reinterpret_cast<const NegOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_PadV2Options: + { + auto ptr = reinterpret_cast<const PadV2Options *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_GreaterOptions: + { + auto ptr = reinterpret_cast<const GreaterOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_GreaterEqualOptions: + { + auto ptr = reinterpret_cast<const GreaterEqualOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LessEqualOptions: + { + auto ptr = reinterpret_cast<const LessEqualOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SelectOptions: + { + auto ptr = reinterpret_cast<const SelectOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SliceOptions: + { + auto ptr = reinterpret_cast<const SliceOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_TransposeConvOptions: + { + auto ptr = reinterpret_cast<const TransposeConvOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SparseToDenseOptions: + { + auto ptr = reinterpret_cast<const SparseToDenseOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_TileOptions: + { + auto ptr = reinterpret_cast<const TileOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ExpandDimsOptions: + { + auto ptr = reinterpret_cast<const ExpandDimsOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_EqualOptions: + { + auto ptr = reinterpret_cast<const EqualOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_NotEqualOptions: + { + auto ptr = reinterpret_cast<const NotEqualOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ShapeOptions: + { + auto ptr = reinterpret_cast<const ShapeOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_PowOptions: + { + auto ptr = reinterpret_cast<const PowOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ArgMinOptions: + { + auto ptr = reinterpret_cast<const ArgMinOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FakeQuantOptions: + { + auto ptr = reinterpret_cast<const FakeQuantOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_PackOptions: + { + auto ptr = reinterpret_cast<const PackOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LogicalOrOptions: + { + auto ptr = reinterpret_cast<const LogicalOrOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_OneHotOptions: + { + auto ptr = reinterpret_cast<const OneHotOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LogicalAndOptions: + { + auto ptr = reinterpret_cast<const LogicalAndOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LogicalNotOptions: + { + auto ptr = reinterpret_cast<const LogicalNotOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UnpackOptions: + { + auto ptr = reinterpret_cast<const UnpackOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FloorDivOptions: + { + auto ptr = reinterpret_cast<const FloorDivOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SquareOptions: + { + auto ptr = reinterpret_cast<const SquareOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ZerosLikeOptions: + { + auto ptr = reinterpret_cast<const ZerosLikeOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FillOptions: + { + auto ptr = reinterpret_cast<const FillOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BidirectionalSequenceLSTMOptions: + { + auto ptr = reinterpret_cast<const BidirectionalSequenceLSTMOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BidirectionalSequenceRNNOptions: + { + auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: + { + auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FloorModOptions: + { + auto ptr = reinterpret_cast<const FloorModOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_RangeOptions: + { + auto ptr = reinterpret_cast<const RangeOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ResizeNearestNeighborOptions: + { + auto ptr = reinterpret_cast<const ResizeNearestNeighborOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LeakyReluOptions: + { + auto ptr = reinterpret_cast<const LeakyReluOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SquaredDifferenceOptions: + { + auto ptr = reinterpret_cast<const SquaredDifferenceOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_MirrorPadOptions: + { + auto ptr = reinterpret_cast<const MirrorPadOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_AbsOptions: + { + auto ptr = reinterpret_cast<const AbsOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SplitVOptions: + { + auto ptr = reinterpret_cast<const SplitVOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_InstanceNormOptions: + { + auto ptr = reinterpret_cast<const InstanceNormOptions *>(obj); + return verifier.VerifyTable(ptr); + } + default: + return false; + } +} + +inline bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, + const flatbuffers::Vector<flatbuffers::Offset<void>> *values, + const flatbuffers::Vector<uint8_t> *types) +{ + if (!values || !types) + return !values && !types; + if (values->size() != types->size()) + return false; + for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) + { + if (!VerifyBuiltinOptions(verifier, values->Get(i), types->GetEnum<BuiltinOptions>(i))) + { + return false; + } + } + return true; +} + +inline const circle::Model *GetModel(const void *buf) +{ + return flatbuffers::GetRoot<circle::Model>(buf); +} + +inline const circle::Model *GetSizePrefixedModel(const void *buf) +{ + return flatbuffers::GetSizePrefixedRoot<circle::Model>(buf); +} + +inline const char *ModelIdentifier() { return "CIR0"; } + +inline bool ModelBufferHasIdentifier(const void *buf) +{ + return flatbuffers::BufferHasIdentifier(buf, ModelIdentifier()); +} + +inline bool VerifyModelBuffer(flatbuffers::Verifier &verifier) +{ + return verifier.VerifyBuffer<circle::Model>(ModelIdentifier()); +} + +inline bool VerifySizePrefixedModelBuffer(flatbuffers::Verifier &verifier) +{ + return verifier.VerifySizePrefixedBuffer<circle::Model>(ModelIdentifier()); +} + +inline const char *ModelExtension() { return "circle"; } + +inline void FinishModelBuffer(flatbuffers::FlatBufferBuilder &fbb, + flatbuffers::Offset<circle::Model> root) +{ + fbb.Finish(root, ModelIdentifier()); +} + +inline void FinishSizePrefixedModelBuffer(flatbuffers::FlatBufferBuilder &fbb, + flatbuffers::Offset<circle::Model> root) +{ + fbb.FinishSizePrefixed(root, ModelIdentifier()); +} + +} // namespace circle + +#endif // FLATBUFFERS_GENERATED_CIRCLESCHEMA_CIRCLE_H_ diff --git a/runtime/neurun/frontend/nnapi/ANeuralNetworksModel.test.cc b/runtime/neurun/frontend/nnapi/ANeuralNetworksModel.test.cc new file mode 100644 index 000000000..15a279a7e --- /dev/null +++ b/runtime/neurun/frontend/nnapi/ANeuralNetworksModel.test.cc @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include "wrapper/ANeuralNetworksModel.h" + +TEST(MODEL, model_build) +{ + ANeuralNetworksModel model; + ASSERT_EQ(model.isFinished(), false); +} diff --git a/runtime/neurun/frontend/nnapi/CMakeLists.txt b/runtime/neurun/frontend/nnapi/CMakeLists.txt new file mode 100644 index 000000000..3c3411e05 --- /dev/null +++ b/runtime/neurun/frontend/nnapi/CMakeLists.txt @@ -0,0 +1,23 @@ +file(GLOB_RECURSE SOURCES_FRONTEND "*.cc") +file(GLOB_RECURSE TESTS_FRONTEND "*.test.cc") +list(REMOVE_ITEM SOURCES_FRONTEND ${TESTS_FRONTEND}) + +set(LIB_NEURUN neurun) + +add_library(${LIB_NEURUN} SHARED ${SOURCES_FRONTEND}) +target_link_libraries(${LIB_NEURUN} PUBLIC nnfw-nnapi-header) +target_link_libraries(${LIB_NEURUN} PUBLIC neurun_core) # TODO Link PRIVATE neurun_core +target_link_libraries(${LIB_NEURUN} PRIVATE nnfw_common) +target_link_libraries(${LIB_NEURUN} PRIVATE nnfw_coverage) + +set_target_properties(${LIB_NEURUN} PROPERTIES OUTPUT_NAME neuralnetworks) + +install(TARGETS ${LIB_NEURUN} DESTINATION lib) + +add_executable(test_neurun_frontend_nnapi ${TESTS_FRONTEND}) + +target_link_libraries(test_neurun_frontend_nnapi PRIVATE ${LIB_NEURUN} dl) +target_link_libraries(test_neurun_frontend_nnapi PRIVATE gtest) +target_link_libraries(test_neurun_frontend_nnapi PRIVATE gtest_main) + +install(TARGETS test_neurun_frontend_nnapi DESTINATION unittest) diff --git a/runtime/neurun/frontend/nnapi/compilation.cc b/runtime/neurun/frontend/nnapi/compilation.cc new file mode 100644 index 000000000..164158f19 --- /dev/null +++ b/runtime/neurun/frontend/nnapi/compilation.cc @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <NeuralNetworks.h> + +#include <new> + +#include "wrapper/ANeuralNetworksModel.h" +#include "wrapper/ANeuralNetworksCompilation.h" +#include "util/logging.h" + +// +// NNAPI Implementation +// +int ANeuralNetworksCompilation_create(ANeuralNetworksModel *model, + ANeuralNetworksCompilation **compilation) +{ + if ((model == nullptr) || (compilation == nullptr)) + { + VERBOSE(NNAPI::Compilation) << "create: Incorrect null pointer parameter(s)" << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + if (!model->isFinished()) + { + VERBOSE(NNAPI::Compilation) << "create: Model define is not finished" << std::endl; + return ANEURALNETWORKS_BAD_STATE; + } + + std::shared_ptr<neurun::ir::Graph> internal; + + model->release(internal); + + *compilation = new (std::nothrow) ANeuralNetworksCompilation(internal); + if (*compilation == nullptr) + { + VERBOSE(NNAPI::Compilation) << "create: ail to create compilation object" << std::endl; + return ANEURALNETWORKS_OUT_OF_MEMORY; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation *compilation) +{ + if (compilation == nullptr) + { + VERBOSE(NNAPI::Compilation) << "finish: Incorrect null pointer parameter" << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + if (compilation->state() != ::neurun::compiler::State::CREATED) + { + VERBOSE(NNAPI::Compilation) << "finish: Already finished" << std::endl; + return ANEURALNETWORKS_BAD_STATE; + } + + if (!compilation->finish()) + { + VERBOSE(NNAPI::Compilation) << "finish: Fail to compile" << std::endl; + return ANEURALNETWORKS_BAD_STATE; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +void ANeuralNetworksCompilation_free(ANeuralNetworksCompilation *compilation) +{ + delete compilation; +} + +int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation *compilation, + int32_t preference) +{ + if (compilation == nullptr) + { + VERBOSE(NNAPI::Compilation) << "setPreference: Incorrect null pointer parameter" << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + if (compilation->state() != ::neurun::compiler::State::CREATED) + { + VERBOSE(NNAPI::Compilation) << "setPreference: Already finished" << std::endl; + return ANEURALNETWORKS_BAD_STATE; + } + + const PreferenceCode FIRST_PREFERENCE_CODE = ANEURALNETWORKS_PREFER_LOW_POWER; + const PreferenceCode LAST_PREFERENCE_CODE = ANEURALNETWORKS_PREFER_SUSTAINED_SPEED; + if ((preference < FIRST_PREFERENCE_CODE) || (preference > LAST_PREFERENCE_CODE)) + { + VERBOSE(NNAPI::Compilation) << "setPreference: Incorrect preference code" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + // NYI: nothing to set + return ANEURALNETWORKS_NO_ERROR; +} diff --git a/runtime/neurun/frontend/nnapi/event.cc b/runtime/neurun/frontend/nnapi/event.cc new file mode 100644 index 000000000..593b74e90 --- /dev/null +++ b/runtime/neurun/frontend/nnapi/event.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <NeuralNetworks.h> + +#include "wrapper/ANeuralNetworksEvent.h" + +int ANeuralNetworksEvent_wait(ANeuralNetworksEvent *event) +{ + if (event == nullptr) + { + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + if (!event->waitFinish()) + { + return ANEURALNETWORKS_BAD_STATE; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +void ANeuralNetworksEvent_free(ANeuralNetworksEvent *event) { delete event; } diff --git a/runtime/neurun/frontend/nnapi/execution.cc b/runtime/neurun/frontend/nnapi/execution.cc new file mode 100644 index 000000000..08f2df4c2 --- /dev/null +++ b/runtime/neurun/frontend/nnapi/execution.cc @@ -0,0 +1,480 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <NeuralNetworks.h> + +#include <new> + +#include "wrapper/ANeuralNetworksCompilation.h" +#include "wrapper/ANeuralNetworksExecution.h" +#include "wrapper/ANeuralNetworksMemory.h" +#include "wrapper/ANeuralNetworksEvent.h" +#include "wrapper/NNAPIConvert.h" +#include "util/logging.h" + +// +// NNAPI Implementation +// +int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation, + ANeuralNetworksExecution **execution) +{ + if ((compilation == nullptr) || (execution == nullptr)) + { + VERBOSE(NNAPI::Execution) << "create: Incorrect null pointer parameter(s)" << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + std::shared_ptr<neurun::exec::IExecutor> executor; + + compilation->publish(executor); + + if (executor == nullptr) + { + VERBOSE(NNAPI::Execution) << "create: Never compiled yet" << std::endl; + return ANEURALNETWORKS_BAD_STATE; + } + + *execution = new (std::nothrow) ANeuralNetworksExecution{executor}; + if (*execution == nullptr) + { + VERBOSE(NNAPI::Execution) << "create: Fail to create execution object" << std::endl; + return ANEURALNETWORKS_OUT_OF_MEMORY; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +// NOTE Handle optional input +// Unspecified shape on model build +// Optional and omitted input on execution: skip input setting (workaround for LSTM) +// Optional but not omitted input on execution: cannot handle +// Normal input on execution: cannot handle +// Fully specified shape on model build +// Optional input on execution: cannot handle +// Normal input: handle normally +int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution *execution, int32_t index, + const ANeuralNetworksOperandType *type, const void *buffer, + size_t length) +{ + // Don't check type + // Comment about ANeuralNetworksOperandType in NeuralNetworks.h: + // If the input or output is optional and omitted then it need not have a fully specified tensor + // operand type + if ((execution == nullptr) || ((buffer == nullptr) && (length != 0))) + { + VERBOSE(NNAPI::Execution) << "setInput: Incorrect null pointer parameter(s)" << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + if ((buffer != nullptr) && (length == 0)) + { + VERBOSE(NNAPI::Execution) << "setInput: Zero length input" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + const auto operand_index = execution->getInputOperandIndex(index); + if (!operand_index.valid()) + { + VERBOSE(NNAPI::Execution) << "setInput: Invalid input index" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + // Omitted optional input + // LSTM operation's some inputs can be optional input + if ((buffer == nullptr) && (length == 0)) + { + if (execution->haveUnspecifiedDims(operand_index)) + { + return ANEURALNETWORKS_NO_ERROR; + } + else + { + VERBOSE(NNAPI::Execution) << "setInput: Cannot handle fully-specified shape on model build " + "but omitted input on execution" + << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + } + + if (type != nullptr) + { + if (!execution->compareDataType(type, operand_index)) + { + VERBOSE(NNAPI::Execution) << "setInput: Data type mismatch" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (!execution->compareShape(type, operand_index)) + { + VERBOSE(NNAPI::Execution) << "setInput: Shape mismatch" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (NNAPIConvert::calculateSizeFromType(type) != length) + { + VERBOSE(NNAPI::Execution) << "setInput: Invalid length" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + } + else + { + if (execution->haveUnspecifiedDims(operand_index)) + { + VERBOSE(NNAPI::Execution) << "setInput: Unspecified dimension value" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (execution->getOperandSize(operand_index) != length) + { + VERBOSE(NNAPI::Execution) << "setInput: Invalid length" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + } + + if (!execution->setInput(index, type, buffer, length)) + { + VERBOSE(NNAPI::Execution) << "setInput: Fail to set input" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution *execution, int32_t index, + const ANeuralNetworksOperandType *type, void *buffer, + size_t length) +{ + // Don't check type + // Comment about ANeuralNetworksOperandType in NeuralNetworks.h: + // If the input or output is optional and omitted then it need not have a fully specified tensor + // operand type + if ((execution == nullptr) || ((buffer == nullptr) && (length != 0))) + { + VERBOSE(NNAPI::Execution) << "setOutput: Incorrect null pointer parameter(s)" << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + if ((buffer != nullptr) && (length == 0)) + { + VERBOSE(NNAPI::Execution) << "setOutput: Zero length output" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + // Handle optional output + if (buffer == nullptr) + { + return ANEURALNETWORKS_NO_ERROR; + } + + const auto operand_index = execution->getOutputOperandIndex(index); + if (!operand_index.valid()) + { + VERBOSE(NNAPI::Execution) << "setOutput: Invalid output index" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (type != nullptr) + { + if (!execution->compareDataType(type, operand_index)) + { + VERBOSE(NNAPI::Execution) << "setOutput: Data type mismatch" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (!execution->compareShape(type, operand_index)) + { + VERBOSE(NNAPI::Execution) << "setOutput: Shape mismatch" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (NNAPIConvert::calculateSizeFromType(type) != length) + { + VERBOSE(NNAPI::Execution) << "setOutput: Invalid length" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + } + else + { + if (execution->haveUnspecifiedDims(operand_index)) + { + VERBOSE(NNAPI::Execution) << "setOutput: Unspecified dimension value" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (execution->getOperandSize(operand_index) != length) + { + VERBOSE(NNAPI::Execution) << "setInput: Invalid length" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + } + + if (!execution->setOutput(index, type, buffer, length)) + { + VERBOSE(NNAPI::Execution) << "setOutput: Fail to set output" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution *execution, + ANeuralNetworksEvent **event) +{ + if ((execution == nullptr) || (event == nullptr)) + { + VERBOSE(NNAPI::Execution) << "startCompute: Incorrect null pointer parameter(s)" << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + // TODO: Handle event + auto instance = execution->instance(); + *event = new (std::nothrow) ANeuralNetworksEvent{instance}; + if (*event == nullptr) + { + VERBOSE(NNAPI::Execution) << "startCompute: Fail to create event" << std::endl; + return ANEURALNETWORKS_OUT_OF_MEMORY; + } + + if (!execution->startExecute()) + { + VERBOSE(NNAPI::Execution) << "startCompute: Fail to start execution" << std::endl; + return ANEURALNETWORKS_BAD_STATE; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +int ANeuralNetworksExecution_compute(ANeuralNetworksExecution *execution) +{ + if (execution == nullptr) + { + VERBOSE(NNAPI::Execution) << "Compute: Incorrect null pointer parameter" << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + if (!execution->execute()) + { + VERBOSE(NNAPI::Execution) << "Compute: Fail to execution" << std::endl; + return ANEURALNETWORKS_BAD_STATE; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +void ANeuralNetworksExecution_free(ANeuralNetworksExecution *execution) { delete execution; } + +int ANeuralNetworksExecution_setInputFromMemory(ANeuralNetworksExecution *execution, int32_t index, + const ANeuralNetworksOperandType *type, + const ANeuralNetworksMemory *memory, size_t offset, + size_t length) +{ + if ((execution == nullptr) || (memory == nullptr)) + { + VERBOSE(NNAPI::Execution) << "setInputFromMemory: Incorrect null pointer parameter(s)" + << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + if (length == 0) + { + VERBOSE(NNAPI::Execution) << "setInputFromMemory: Zero length input" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + const auto operand_index = execution->getInputOperandIndex(index); + if (!operand_index.valid()) + { + VERBOSE(NNAPI::Execution) << "setInputFromMemory: Invalid input index" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (type != nullptr) + { + if (!execution->compareDataType(type, operand_index)) + { + VERBOSE(NNAPI::Execution) << "setInputFromMemory: Data type mismatch" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (!execution->compareShape(type, operand_index)) + { + VERBOSE(NNAPI::Execution) << "setInputFromMemory: Shape mismatch" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (NNAPIConvert::calculateSizeFromType(type) != length) + { + VERBOSE(NNAPI::Execution) << "setInputFromMemory: Invalid length" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + } + else + { + if (execution->haveUnspecifiedDims(operand_index)) + { + VERBOSE(NNAPI::Execution) << "setInputFromMemory: Unspecified dimension value" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (execution->getOperandSize(operand_index) != length) + { + VERBOSE(NNAPI::Execution) << "setInputFromMemory: Invalid length" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + } + + if (!memory->vaildAccess(offset, length)) + { + VERBOSE(NNAPI::Execution) << "setInputFromMemory: Invalid memory access" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (!execution->setInput(index, type, reinterpret_cast<const void *>(memory->base() + offset), + length)) + { + VERBOSE(NNAPI::Execution) << "setInputFromMemory: Fail to set input" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +int ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution *execution, int32_t index, + const ANeuralNetworksOperandType *type, + const ANeuralNetworksMemory *memory, size_t offset, + size_t length) +{ + if ((execution == nullptr) || (memory == nullptr)) + { + VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Incorrect null pointer parameter(s)" + << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + if (length == 0) + { + VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Zero length input" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + const auto operand_index = execution->getOutputOperandIndex(index); + if (!operand_index.valid()) + { + VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Invalid output index" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (type != nullptr) + { + if (!execution->compareDataType(type, operand_index)) + { + VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Data type mismatch" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (!execution->compareShape(type, operand_index)) + { + VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Shape mismatch" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (NNAPIConvert::calculateSizeFromType(type) != length) + { + VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Invalid length" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + } + else + { + if (execution->haveUnspecifiedDims(operand_index)) + { + VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Unspecified dimension value" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (execution->getOperandSize(operand_index) != length) + { + VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Invalid length" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + } + + if (!memory->vaildAccess(offset, length)) + { + VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Invalid memory access" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (!execution->setOutput(index, type, reinterpret_cast<void *>(memory->base() + offset), length)) + { + VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Fail to set input" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +int ANeuralNetworksExecution_getOutputOperandRank(ANeuralNetworksExecution *execution, + int32_t index, uint32_t *rank) +{ + if ((execution == nullptr) || (rank == nullptr)) + { + VERBOSE(NNAPI::Execution) << "getOutputOperandRank: Incorrect null pointer parameter(s)" + << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + const auto operand_index = execution->getOutputOperandIndex(index); + if (!operand_index.valid()) + { + VERBOSE(NNAPI::Execution) << "getOutputOperandRank: Invalid output index" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (!execution->getOutputOperandRank(index, rank)) + { + VERBOSE(NNAPI::Execution) << "getOutputOperandRank: Fail to get rank" << std::endl; + return ANEURALNETWORKS_BAD_STATE; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +int ANeuralNetworksExecution_getOutputOperandDimensions(ANeuralNetworksExecution *execution, + int32_t index, uint32_t *dimensions) +{ + if ((execution == nullptr) || (dimensions == nullptr)) + { + VERBOSE(NNAPI::Execution) << "getOutputOperandDimensions: Incorrect null pointer parameter(s)" + << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + const auto operand_index = execution->getOutputOperandIndex(index); + if (!operand_index.valid()) + { + VERBOSE(NNAPI::Execution) << "getOutputOperandDimensions: Invalid output index" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (!execution->getOutputOperandDimensions(index, dimensions)) + { + VERBOSE(NNAPI::Execution) << "getOutputOperandDimensions: Fail to get rank" << std::endl; + return ANEURALNETWORKS_BAD_STATE; + } + + return ANEURALNETWORKS_NO_ERROR; +} diff --git a/runtime/neurun/frontend/nnapi/memory.cc b/runtime/neurun/frontend/nnapi/memory.cc new file mode 100644 index 000000000..fbe1a48e8 --- /dev/null +++ b/runtime/neurun/frontend/nnapi/memory.cc @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <NeuralNetworks.h> +#include <sys/mman.h> +#include <new> +#include <memory> + +#include "cpp14/memory.h" +#include "wrapper/ANeuralNetworksMemory.h" + +int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t offset, + ANeuralNetworksMemory **memory) +{ + if (memory == nullptr) + { + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + *memory = new (std::nothrow) ANeuralNetworksMemory{size, protect, fd, offset}; + if (*memory == nullptr) + { + return ANEURALNETWORKS_OUT_OF_MEMORY; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +void ANeuralNetworksMemory_free(ANeuralNetworksMemory *memory) { delete memory; } diff --git a/runtime/neurun/frontend/nnapi/model.cc b/runtime/neurun/frontend/nnapi/model.cc new file mode 100644 index 000000000..72a66e630 --- /dev/null +++ b/runtime/neurun/frontend/nnapi/model.cc @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <NeuralNetworks.h> +#include <NeuralNetworksEx.h> + +#include <new> + +#include "wrapper/ANeuralNetworksModel.h" +#include "wrapper/ANeuralNetworksMemory.h" +#include "util/logging.h" + +int ANeuralNetworksModel_create(ANeuralNetworksModel **model) +{ + if (model == nullptr) + { + VERBOSE(NNAPI::Model) << "create: Incorrect null pointer parameter" << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + *model = new (std::nothrow) ANeuralNetworksModel{}; + if (*model == nullptr) + { + VERBOSE(NNAPI::Model) << "create: Fail to create model object" << std::endl; + return ANEURALNETWORKS_OUT_OF_MEMORY; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +void ANeuralNetworksModel_free(ANeuralNetworksModel *model) { delete model; } + +int ANeuralNetworksModel_addOperand(ANeuralNetworksModel *model, + const ANeuralNetworksOperandType *type) +{ + if ((model == nullptr) || (type == nullptr)) + { + VERBOSE(NNAPI::Model) << "addOperand: Incorrect null pointer parameter(s)" << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + if (model->isFinished()) + { + VERBOSE(NNAPI::Model) << "addOperand: Already finished" << std::endl; + return ANEURALNETWORKS_BAD_STATE; + } + + // scale and zeroPoint should be zero for scalars and non-fixed point tensors + // Quantized: + // scale: a 32 bit floating point value greater than zero + // zeroPoint: a 32 bit integer, in range [0, 255] + if (type->type == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM) + { + if (!(type->scale > 0.0f)) + { + VERBOSE(NNAPI::Model) << "addOperand: Incorrect scale value for quantization" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if ((type->zeroPoint < 0) || (type->zeroPoint > 255)) + { + VERBOSE(NNAPI::Model) << "addOperand: Incorrect zeroPoint value for quantization" + << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + } + // NOTE Validation of scale and zeroPoint would be skipped for a while. + // We do not know whether scalar type can have scale and zeroPoint. + // To pass ValidationTest and GeneratedTest, this validation code + // would not be implemented until we can define this issue clearly. + // + // scale and zeroPoint should be zero for scalars and non-fixed point tensors + // else if ((type->scale != 0.0f) || (type->zeroPoint != 0)) + // { + // return ANEURALNETWORKS_BAD_DATA; + // } + + // dimensionCount should be zero for scalars + if ((type->dimensionCount != 0) && + ((type->type == ANEURALNETWORKS_FLOAT32) || (type->type == ANEURALNETWORKS_INT32) || + (type->type == ANEURALNETWORKS_UINT32))) + { + VERBOSE(NNAPI::Model) << "addOperand: Incorrect data type" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (!model->addOperand(type)) + { + VERBOSE(NNAPI::Model) << "addOperand: Fail to add operand" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel *model, int32_t index, + const void *buffer, size_t length) +{ + const bool optional_operand = ((buffer == nullptr) && (length == 0)); + + if ((model == nullptr) || ((buffer == nullptr) && (length != 0))) + { + VERBOSE(NNAPI::Model) << "setOperandValue: Incorrect null pointer parameter(s)" << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + if (model->isFinished()) + { + VERBOSE(NNAPI::Model) << "setOperandValue: Already finished" << std::endl; + return ANEURALNETWORKS_BAD_STATE; + } + + // Negative index value is not allowed + if (index < 0) + { + VERBOSE(NNAPI::Model) << "setOperandValue: Invalid index value (negative)" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + // NOTE OperandIndex uses uint32_t as its underlying type as various NNAPI + // functions such as ANeuralNetworksModel_addOperation use uint32_t to represent operand + // index + // ANeuralNetworksModel_setOperandValue, however, uses int32_t to represent operand index. + // + // Below, static_cast<uint32_t>(...) is introduced to eliminate compiler warning. + uint32_t ind = static_cast<uint32_t>(index); + + if (!model->isExistOperand(ind)) + { + VERBOSE(NNAPI::Model) << "setOperandValue: Invalid index value (not exist)" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (!optional_operand && (model->operandSize(ind) != length)) + { + VERBOSE(NNAPI::Model) << "setOperandValue: Invalid data length" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (model->isUsageSet(ind)) + { + VERBOSE(NNAPI::Model) << "setOperandValue: Already set operand" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + // NNAPI spec in NeuralNetworks.h + // For values of length greater than ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES, + // the application is responsible for not changing the content of this region + // until all executions using this model have completed + bool copy_value = false; + if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) + { + copy_value = true; + } + + if (!model->setOperandValue(ind, buffer, length, optional_operand, copy_value)) + { + VERBOSE(NNAPI::Model) << "setOperandValue: Fail to set operand value" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +int ANeuralNetworksModel_setOperandValueFromMemory(ANeuralNetworksModel *model, int32_t index, + const ANeuralNetworksMemory *memory, + size_t offset, size_t length) +{ + if ((model == nullptr) || (memory == nullptr)) + { + VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Incorrect null pointer parameter(s)" + << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + if (model->isFinished()) + { + VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Already finished" << std::endl; + return ANEURALNETWORKS_BAD_STATE; + } + + // Negative index value is not allowed + if (index < 0) + { + VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Invalid index value (negative)" + << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + // NOTE OperandIndex uses uint32_t as its underlying type as various NNAPI + // functions such as ANeuralNetworksModel_addOperation use uint32_t to represent operand + // index + // ANeuralNetworksModel_setOperandValue, however, uses int32_t to represent operand index. + // + // Below, static_cast<uint32_t>(...) is introduced to eliminate compiler warning. + uint32_t ind = static_cast<uint32_t>(index); + + if (!model->isExistOperand(ind)) + { + VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Invalid index value (not exist)" + << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if ((model->operandSize(ind) != length) || (memory->size() < (offset + length))) + { + VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Invalid data length" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (model->isUsageSet(ind)) + { + VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Already set operand" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (!model->setOperandValue(ind, memory->base() + offset, length)) + { + VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Fail to set operand value" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model, + ANeuralNetworksOperationType type, uint32_t inputCount, + const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + if ((model == nullptr) || (inputs == nullptr) || (outputs == nullptr)) + { + VERBOSE(NNAPI::Model) << "addOperation: Incorrect null pointer parameter(s)" << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + if (model->isFinished()) + { + VERBOSE(NNAPI::Model) << "addOperation: Already finished" << std::endl; + return ANEURALNETWORKS_BAD_STATE; + } + + const ANeuralNetworksOperationType FIRST_OPERATION = ANEURALNETWORKS_ADD; + const ANeuralNetworksOperationType LAST_OPERATION = ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR; + if ((type < FIRST_OPERATION) || (type > LAST_OPERATION)) + { + return ANEURALNETWORKS_BAD_DATA; + } + + for (uint32_t i = 0; i < outputCount; i++) + { + if (model->isUsageSet(outputs[i])) + { + VERBOSE(NNAPI::Model) << "addOperation: Already set output operand" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + } + + if (!model->addOperation(type, inputCount, inputs, outputCount, outputs)) + { + VERBOSE(NNAPI::Model) << "addOperation: Fail to add operation" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model, + ANeuralNetworksOperationTypeEx type, uint32_t inputCount, + const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + if ((model == nullptr) || (inputs == nullptr) || (outputs == nullptr)) + { + VERBOSE(NNAPI::Model) << "addOperation: Incorrect null pointer parameter(s)" << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + if (model->isFinished()) + { + VERBOSE(NNAPI::Model) << "addOperation: Already finished" << std::endl; + return ANEURALNETWORKS_BAD_STATE; + } + + const ANeuralNetworksOperationTypeEx FIRST_OPERATION = ANEURALNETWORKS_CAST_EX; + const ANeuralNetworksOperationTypeEx LAST_OPERATION = ANEURALNETWORKS_LESS_EX; + if ((type < FIRST_OPERATION) || (type > LAST_OPERATION)) + { + VERBOSE(NNAPI::Model) << "addOperation: Invalid operation type" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + for (uint32_t i = 0; i < outputCount; i++) + { + if (model->isUsageSet(outputs[i])) + { + VERBOSE(NNAPI::Model) << "addOperation: Already set output operand" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + } + + if (!model->addOperationEx(type, inputCount, inputs, outputCount, outputs)) + { + VERBOSE(NNAPI::Model) << "addOperation: Fail to add operation" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +int ANeuralNetworksModel_identifyInputsAndOutputs(ANeuralNetworksModel *model, uint32_t inputCount, + const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + if ((model == nullptr) || (inputs == nullptr) || (outputs == nullptr)) + { + VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Incorrect null pointer parameter(s)" + << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + if (model->isFinished()) + { + VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Already finished" << std::endl; + return ANEURALNETWORKS_BAD_STATE; + } + + for (uint32_t n = 0; n < inputCount; ++n) + { + uint32_t ind = inputs[n]; + if (model->isUsageSet(ind)) + { + VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Already set input operand" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (!model->addModelInput(ind)) + { + VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Fail to add input" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + } + + for (uint32_t n = 0; n < outputCount; ++n) + { + uint32_t ind = outputs[n]; + + if (!model->isOperationOutput(ind)) + { + VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Need to set output operand" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + + if (!model->addModelOutput(ind)) + { + VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Fail to add output" << std::endl; + return ANEURALNETWORKS_BAD_DATA; + } + } + + return ANEURALNETWORKS_NO_ERROR; +} + +int ANeuralNetworksModel_finish(ANeuralNetworksModel *model) +{ + if (model == nullptr) + { + VERBOSE(NNAPI::Model) << "finish: Incorrect null pointer parameter" << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + if (model->isFinished()) + { + VERBOSE(NNAPI::Model) << "finish: Already finished" << std::endl; + return ANEURALNETWORKS_BAD_STATE; + } + + if (!model->finish()) + { + VERBOSE(NNAPI::Model) << "finish: Fail to generate internal graph" << std::endl; + return ANEURALNETWORKS_BAD_STATE; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +int ANeuralNetworksModel_relaxComputationFloat32toFloat16(ANeuralNetworksModel *model, bool) +{ + if (model == nullptr) + { + VERBOSE(NNAPI::Model) << "relaxComputationFloat32toFloat16: Incorrect null pointer parameter" + << std::endl; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + // NYI: nothing to set + VERBOSE(NNAPI::Model) << "relaxComputationFloat32toFloat16: Do nothing yet" << std::endl; + + return ANEURALNETWORKS_NO_ERROR; +} diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc new file mode 100644 index 000000000..1aa1583aa --- /dev/null +++ b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ANeuralNetworksCompilation.h" + +#include "util/logging.h" + +ANeuralNetworksCompilation::ANeuralNetworksCompilation( + const std::shared_ptr<neurun::ir::Graph> &model) noexcept + : _compiler{new neurun::compiler::Compiler{model}} +{ + // DO NOTHING +} + +bool ANeuralNetworksCompilation::finish() noexcept +{ + try + { + _compiler->compile(); + } + catch (const std::exception &e) + { + VERBOSE(EXCEPTION) << e.what() << std::endl; + + return false; + } + + return true; +} diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h new file mode 100644 index 000000000..56b402d16 --- /dev/null +++ b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __COMPILATION_H__ +#define __COMPILATION_H__ + +#include "compiler/Compiler.h" +#include "ir/Graph.h" +#include "exec/IExecutor.h" + +struct ANeuralNetworksCompilation +{ +public: + ANeuralNetworksCompilation(const std::shared_ptr<neurun::ir::Graph> &graph) noexcept; + +public: + bool finish() noexcept; + + neurun::compiler::State state(void) noexcept { return _compiler->state(); } + void publish(std::shared_ptr<neurun::exec::IExecutor> &executor) noexcept + { + _compiler->release(executor); + } + +private: + std::shared_ptr<neurun::compiler::Compiler> _compiler; +}; + +#endif diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc new file mode 100644 index 000000000..b09f9abe6 --- /dev/null +++ b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ANeuralNetworksEvent.h" + +#include "exec/Execution.h" +#include "util/logging.h" + +ANeuralNetworksEvent::ANeuralNetworksEvent( + const std::shared_ptr<neurun::exec::Execution> &execution) + : _execution{execution} +{ + // DO NOTHING +} + +bool ANeuralNetworksEvent::waitFinish(void) noexcept +{ + try + { + _execution->waitFinish(); + } + catch (const std::exception &e) + { + VERBOSE(EXCEPTION) << e.what() << std::endl; + + return false; + } + + return true; +} diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksEvent.h b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksEvent.h new file mode 100644 index 000000000..e499bab77 --- /dev/null +++ b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksEvent.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __EVENT_H__ +#define __EVENT_H__ + +#include <NeuralNetworks.h> + +#include <memory> + +namespace neurun +{ +namespace exec +{ +class Execution; +} // namespace exec +} // namespace neurun + +struct ANeuralNetworksEvent +{ +public: + ANeuralNetworksEvent(const std::shared_ptr<neurun::exec::Execution> &execution); + +public: + bool waitFinish(void) noexcept; + +private: + const std::shared_ptr<neurun::exec::Execution> _execution; +}; + +#endif diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc new file mode 100644 index 000000000..b8e43a691 --- /dev/null +++ b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ANeuralNetworksExecution.h" +#include "NNAPIConvert.h" +#include "util/logging.h" + +const neurun::ir::OperandIndex +ANeuralNetworksExecution::getInputOperandIndex(int32_t index) noexcept +{ + if (index < 0) + { + // Negative index: return invalid index + return neurun::ir::OperandIndex{}; + } + + uint32_t cast_index = static_cast<uint32_t>(index); + if (cast_index >= _execution->graph().getInputs().size()) + { + // Return invalid index + return neurun::ir::OperandIndex{}; + } + + neurun::ir::IOIndex input_index{cast_index}; + const auto operand_index = _execution->graph().getInputs().at(input_index); + return operand_index; +} + +const neurun::ir::OperandIndex +ANeuralNetworksExecution::getOutputOperandIndex(int32_t index) noexcept +{ + if (index < 0) + { + // Negative index: return invalid index + return neurun::ir::OperandIndex{}; + } + + uint32_t cast_index = static_cast<uint32_t>(index); + if (cast_index >= _execution->graph().getOutputs().size()) + { + // Return invalid index + return neurun::ir::OperandIndex{}; + } + + neurun::ir::IOIndex output_index{cast_index}; + const auto operand_index = _execution->graph().getOutputs().at(output_index); + return operand_index; +} + +bool ANeuralNetworksExecution::compareDataType(const ANeuralNetworksOperandType *type, + const neurun::ir::OperandIndex index) noexcept +{ + try + { + const auto operand_type = _execution->graph().operands().at(index).typeInfo(); + const auto typeInfo = NNAPIConvert::getTypeInfo(type); + + if (operand_type != typeInfo) + { + // Data type mismatch + return false; + } + } + catch (const std::exception &e) + { + VERBOSE(EXCEPTION) << e.what() << std::endl; + + return false; + } + + return true; +} + +bool ANeuralNetworksExecution::compareShape(const ANeuralNetworksOperandType *type, + const neurun::ir::OperandIndex index) noexcept +{ + // Passed shape should be specified + if (haveUnspecifiedDims(index)) + { + return false; + } + + const auto &operand_shape = _execution->graph().operands().at(index).shape(); + const auto &shape_from_type = NNAPIConvert::getShape(type); + + return operand_shape == shape_from_type; +} + +bool ANeuralNetworksExecution::haveUnspecifiedDims(const neurun::ir::OperandIndex index) noexcept +{ + const auto operand_shape = _execution->graph().operands().at(index).shape(); + + return operand_shape.num_elements() == 0; +} + +size_t ANeuralNetworksExecution::getOperandSize(const neurun::ir::OperandIndex index) noexcept +{ + try + { + return _execution->graph().operands().at(index).operandSize(); + } + catch (const std::exception &e) + { + VERBOSE(EXCEPTION) << e.what() << std::endl; + + return 0; + } +} + +bool ANeuralNetworksExecution::setInput(uint32_t index, const ANeuralNetworksOperandType *type, + const void *buffer, size_t length) noexcept +{ + try + { + neurun::ir::IOIndex input_index{index}; + const auto operand_index = getInputOperandIndex(index); + + const auto type_info = _execution->graph().operands().at(operand_index).typeInfo(); + const auto shape = (type != nullptr) ? NNAPIConvert::getShape(type) + : _execution->graph().operands().at(operand_index).shape(); + + // NOTE The nnapi does not provide setting io_layout and not support changing layout. In other + // words, we can assume that io_layout from nnapi always is the same as layout of the used + // model. + // TODO Set layout of model + _execution->setInput(input_index, type_info, shape, buffer, length, neurun::ir::Layout::NHWC); + } + catch (const std::exception &e) + { + VERBOSE(EXCEPTION) << e.what() << std::endl; + + return false; + } + + return true; +} + +bool ANeuralNetworksExecution::setOutput(uint32_t index, const ANeuralNetworksOperandType *type, + void *buffer, size_t length) noexcept +{ + try + { + neurun::ir::IOIndex output_index{index}; + const auto operand_index = getOutputOperandIndex(index); + + const auto type_info = _execution->graph().operands().at(operand_index).typeInfo(); + const auto shape = (type != nullptr) ? NNAPIConvert::getShape(type) + : _execution->graph().operands().at(operand_index).shape(); + + // NOTE The nnapi does not provide setting io_layout and not support changing layout. In other + // words, we can assume that io_layout from nnapi always is the same as layout of the used + // model. + // TODO Set layout of model + _execution->setOutput(output_index, type_info, shape, buffer, length, neurun::ir::Layout::NHWC); + } + catch (const std::exception &e) + { + VERBOSE(EXCEPTION) << e.what() << std::endl; + + return false; + } + + return true; +} + +bool ANeuralNetworksExecution::startExecute(void) noexcept +{ + try + { + _execution->startExecute(); + } + catch (const std::exception &e) + { + VERBOSE(EXCEPTION) << e.what() << std::endl; + + return false; + } + + return true; +} + +bool ANeuralNetworksExecution::execute(void) noexcept +{ + try + { + _execution->execute(); + } + catch (const std::exception &e) + { + VERBOSE(EXCEPTION) << e.what() << std::endl; + + return false; + } + + return true; +} + +const std::shared_ptr<neurun::exec::Execution> ANeuralNetworksExecution::instance(void) noexcept +{ + return _execution; +} + +bool ANeuralNetworksExecution::getOutputOperandRank(uint32_t index, uint32_t *rank) noexcept +{ + try + { + neurun::ir::IOIndex output_index{index}; + const auto operand_index = getOutputOperandIndex(index); + bool unspecified = haveUnspecifiedDims(operand_index); + + // TODO Get unspecified output operand's rank + if (unspecified) + { + throw std::runtime_error{"Unsupport feature"}; + } + + // Check execution is finished + // Output rank and shape may be decided after execution if output is unspecified operand + if (!_execution->isFinished()) + { + return false; + } + + *rank = _execution->graph().operands().at(operand_index).shape().rank(); + } + catch (const std::exception &e) + { + VERBOSE(EXCEPTION) << e.what() << std::endl; + + return false; + } + + return true; +} + +bool ANeuralNetworksExecution::getOutputOperandDimensions(uint32_t index, uint32_t *dimensions) +{ + try + { + neurun::ir::IOIndex output_index{index}; + const auto operand_index = getOutputOperandIndex(index); + bool unspecified = haveUnspecifiedDims(operand_index); + if (unspecified) + { + throw std::runtime_error{"NYI: Models with unspecified output dimensions"}; + } + + // Check execution is finished + // Output rank and shape may be decided after execution if output is unspecified operand + if (!_execution->isFinished()) + { + return false; + } + + auto shape = _execution->graph().operands().at(operand_index).shape(); + for (int i = 0; i < shape.rank(); i++) + { + auto dim = shape.dim(i); + + if (dim <= 0) + { + throw std::runtime_error{"Invalid dimension value"}; + } + + dimensions[i] = static_cast<uint32_t>(dim); + } + } + catch (const std::exception &e) + { + VERBOSE(EXCEPTION) << e.what() << std::endl; + + return false; + } + + return true; +} diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksExecution.h b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksExecution.h new file mode 100644 index 000000000..ecffedc0a --- /dev/null +++ b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksExecution.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __EXECUTION_H__ +#define __EXECUTION_H__ + +#include <NeuralNetworks.h> + +#include <memory> + +#include "exec/Execution.h" + +struct ANeuralNetworksExecution +{ +public: + ANeuralNetworksExecution(const std::shared_ptr<neurun::exec::IExecutor> &executor) + : _execution{std::make_shared<neurun::exec::Execution>(executor)} + { + // DO NOTHING + } + +public: + bool setInput(uint32_t index, const ANeuralNetworksOperandType *type, const void *buffer, + size_t length) noexcept; + bool setOutput(uint32_t index, const ANeuralNetworksOperandType *type, void *buffer, + size_t length) noexcept; + bool startExecute(void) noexcept; + bool execute(void) noexcept; + + const neurun::ir::OperandIndex getInputOperandIndex(int32_t index) noexcept; + const neurun::ir::OperandIndex getOutputOperandIndex(int32_t index) noexcept; + bool compareDataType(const ANeuralNetworksOperandType *type, + const neurun::ir::OperandIndex index) noexcept; + bool compareShape(const ANeuralNetworksOperandType *type, + const neurun::ir::OperandIndex index) noexcept; + bool haveUnspecifiedDims(const neurun::ir::OperandIndex index) noexcept; + size_t getOperandSize(const neurun::ir::OperandIndex index) noexcept; + const std::shared_ptr<neurun::exec::Execution> instance(void) noexcept; + + /** + * @brief Get output operand's rank + * @param[in] index Output index + * @param[out] rank Output operand's rank + * @return @c true if success to get rank, otherwise @c false + */ + bool getOutputOperandRank(uint32_t index, uint32_t *rank) noexcept; + /** + * @brief Get dimensions of the output operand + * @param[in] index Output index + * @param[out] dimensions Output operand's dimensions + * @return @c true if success to get rank, otherwise @c false + * @note This must be called after execution is finished to get resolved output shape + * unspecified in model + */ + bool getOutputOperandDimensions(uint32_t index, uint32_t *dimensions); + +private: + std::shared_ptr<neurun::exec::Execution> _execution; +}; + +#endif diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksMemory.cc b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksMemory.cc new file mode 100644 index 000000000..9cc100585 --- /dev/null +++ b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksMemory.cc @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <NeuralNetworks.h> +#include <sys/mman.h> + +#include "ANeuralNetworksMemory.h" + +// +// ANeuralNetworksMemory +// +ANeuralNetworksMemory::ANeuralNetworksMemory(size_t size, int protect, int fd, size_t offset) +{ + _base = reinterpret_cast<uint8_t *>(mmap(nullptr, size, protect, MAP_PRIVATE, fd, offset)); + _size = size; +} + +ANeuralNetworksMemory::~ANeuralNetworksMemory() { munmap(reinterpret_cast<void *>(_base), _size); } + +bool ANeuralNetworksMemory::vaildAccess(size_t offset, size_t length) const +{ + if ((offset >= _size) || (length > _size)) + { + return false; + } + + if ((offset + length) >= _size) + { + return false; + } + + return true; +} diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksMemory.h b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksMemory.h new file mode 100644 index 000000000..48a1bc5fc --- /dev/null +++ b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksMemory.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEMORY_H__ +#define __MEMORY_H__ + +#include <cstdint> + +struct ANeuralNetworksMemory +{ +public: + ANeuralNetworksMemory(size_t size, int protect, int fd, size_t offset); + ~ANeuralNetworksMemory(); + +public: + size_t size(void) const { return _size; } + uint8_t *base(void) { return _base; } + uint8_t *base(void) const { return _base; } + bool vaildAccess(size_t offset, size_t length) const; + +private: + size_t _size; + uint8_t *_base; +}; + +#endif // __MEMORY_H__ diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksModel.cc b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksModel.cc new file mode 100644 index 000000000..5542a2e83 --- /dev/null +++ b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksModel.cc @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ANeuralNetworksModel.h" +#include "OperationFactory.h" +#include "NNAPIConvert.h" + +#include "ir/Operations.Include.h" +#include "util/logging.h" + +#include "cpp14/memory.h" + +// +// ANeuralNetworksModel +// +ANeuralNetworksModel::ANeuralNetworksModel() noexcept : _optional_operands{}, _operand_usages{} +{ + _graph = std::make_shared<neurun::ir::Graph>(); +} + +bool ANeuralNetworksModel::addOperand(const ANeuralNetworksOperandType *type) noexcept +{ + try + { + const auto shape = NNAPIConvert::getShape(type); + const auto typeInfo = NNAPIConvert::getTypeInfo(type); + _graph->addOperand(shape, typeInfo); + _operand_usages.emplace_back(OperandUsage::NOT_DEFINED); + } + catch (const std::exception &e) + { + VERBOSE(EXCEPTION) << e.what() << std::endl; + + return false; + } + + return true; +} + +bool ANeuralNetworksModel::setOperandValue(uint32_t index, const void *buffer, size_t length, + bool optional, bool copy) noexcept +{ + const neurun::ir::OperandIndex ind{index}; + + try + { + _operand_usages[index] = OperandUsage::CONSTANT; + + // Remain operands.at(ind).data()->base() as nullptr for optional operand + // This will be filled when model finished + if (optional) + { + setOptionalOperand(ind); + } + + using neurun::ir::CachedData; + using neurun::ir::ExternalData; + if (copy) + { + _graph->operands().at(ind).data( + nnfw::cpp14::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(buffer), length)); + } + else + { + _graph->operands().at(ind).data(nnfw::cpp14::make_unique<ExternalData>( + reinterpret_cast<const uint8_t *>(buffer), length)); + } + } + catch (const std::exception &e) + { + VERBOSE(EXCEPTION) << e.what() << std::endl; + + return false; + } + + return true; +} + +bool ANeuralNetworksModel::addOperation(ANeuralNetworksOperationType type, uint32_t inputCount, + const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) noexcept +{ + try + { + for (uint32_t i = 0; i < outputCount; i++) + { + _operand_usages[outputs[i]] = OperandUsage::OPERATION_OUTPUT; + } + + auto &factory = OperationFactory::get(); + OperationFactory::Param param{inputCount, inputs, outputCount, outputs}; + + auto node = factory.create(type, param, _graph->operands()); + _graph->addOperation(std::unique_ptr<neurun::ir::Operation>{node}); + + // TODO Move these codes to delegate.cpp + if (type == ANEURALNETWORKS_FULLY_CONNECTED) + { + const auto &input_operand = + _graph->operands().at(node->getInputs().at(neurun::ir::operation::FullyConnected::INPUT)); + auto &weights_operand = _graph->operands().at( + node->getInputs().at(neurun::ir::operation::FullyConnected::WEIGHT)); + if (input_operand.typeInfo().type() == neurun::ir::DataType::FLOAT32 && + weights_operand.typeInfo().type() == neurun::ir::DataType::QUANT8_ASYMM) + { + weights_operand.type(neurun::ir::DataType::QUANT8_SYMM); + } + } + } + catch (const std::exception &e) + { + VERBOSE(EXCEPTION) << e.what() << std::endl; + + return false; + } + + return true; +} + +bool ANeuralNetworksModel::addOperationEx(ANeuralNetworksOperationTypeEx type, uint32_t inputCount, + const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) noexcept +{ + try + { + for (uint32_t i = 0; i < outputCount; i++) + { + _operand_usages[outputs[i]] = OperandUsage::OPERATION_OUTPUT; + } + + auto &factory = OperationFactory::get(); + OperationFactory::Param param{inputCount, inputs, outputCount, outputs}; + + auto node = factory.create(type, param, _graph->operands()); + _graph->addOperation(std::unique_ptr<neurun::ir::Operation>{node}); + } + catch (const std::exception &e) + { + return false; + } + return true; +} + +bool ANeuralNetworksModel::addModelInput(uint32_t index) noexcept +{ + try + { + _operand_usages[index] = OperandUsage::MODEL_INPUT; + + const neurun::ir::OperandIndex ind{index}; + _graph->addInput(ind); + } + catch (const std::exception &e) + { + VERBOSE(EXCEPTION) << e.what() << std::endl; + + return false; + } + + return true; +} +bool ANeuralNetworksModel::addModelOutput(uint32_t index) noexcept +{ + try + { + const neurun::ir::OperandIndex ind{index}; + + // Duplicated output is not allowed + if (_graph->getOutputs().contains(ind)) + { + return false; + } + + _graph->addOutput(ind); + } + catch (const std::exception &e) + { + VERBOSE(EXCEPTION) << e.what() << std::endl; + + return false; + } + + return true; +} + +bool ANeuralNetworksModel::finish() noexcept +{ + try + { + fillOptionalOperand(); + + _graph->finishBuilding(); + + _operand_usages.clear(); + } + catch (const std::exception &e) + { + VERBOSE(EXCEPTION) << e.what() << '\n'; + + return false; + } + + return true; +} + +bool ANeuralNetworksModel::isFinished() noexcept { return !_graph->isBuildingPhase(); } + +bool ANeuralNetworksModel::isExistOperand(uint32_t index) noexcept +{ + return _graph->operands().exist(neurun::ir::OperandIndex{index}); +} + +size_t ANeuralNetworksModel::operandSize(uint32_t index) noexcept +{ + try + { + return _graph->operands().at(neurun::ir::OperandIndex{index}).operandSize(); + } + catch (const std::exception &e) + { + VERBOSE(EXCEPTION) << e.what() << '\n'; + + return 0; + } +} + +bool ANeuralNetworksModel::isUsageSet(uint32_t index) noexcept +{ + return (_operand_usages[index] != OperandUsage::NOT_DEFINED); +} + +bool ANeuralNetworksModel::isOperationOutput(uint32_t index) noexcept +{ + return (_operand_usages[index] == OperandUsage::OPERATION_OUTPUT); +} + +void ANeuralNetworksModel::setOptionalOperand(const neurun::ir::OperandIndex idx) +{ + _optional_operands.insert(idx); +} + +void ANeuralNetworksModel::fillOptionalOperand(void) +{ + _graph->operations().iterate( + [&](const neurun::ir::OperationIndex &, neurun::ir::Operation &node) { + for (auto input : node.getInputs()) + { + // TODO fill default value for optional operands + if (_optional_operands.find(input) != _optional_operands.end()) + { + throw std::runtime_error{"Optional operand is not supported yet"}; + } + } + }); +} diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksModel.h b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksModel.h new file mode 100644 index 000000000..d364ee39e --- /dev/null +++ b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksModel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MODEL_H__ +#define __MODEL_H__ + +#include <unordered_set> +#include <NeuralNetworks.h> +#include <NeuralNetworksEx.h> + +#include "ir/Graph.h" + +struct ANeuralNetworksModel +{ +public: + enum class OperandUsage + { + NOT_DEFINED = 0, + MODEL_INPUT, + CONSTANT, + OPERATION_OUTPUT, + }; + +public: + ANeuralNetworksModel() noexcept; + +public: + bool addOperand(const ANeuralNetworksOperandType *type) noexcept; + bool setOperandValue(uint32_t index, const void *buffer, size_t length, bool optional = false, + bool copy = false) noexcept; + bool addOperation(ANeuralNetworksOperationType type, uint32_t inputCount, const uint32_t *inputs, + uint32_t outputCount, const uint32_t *outputs) noexcept; + bool addOperationEx(ANeuralNetworksOperationTypeEx type, uint32_t inputCount, + const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) noexcept; + bool addModelInput(uint32_t index) noexcept; + bool addModelOutput(uint32_t index) noexcept; + bool finish() noexcept; + + neurun::ir::Graph &deref(void) { return *_graph; } + bool isFinished() noexcept; + bool isExistOperand(uint32_t index) noexcept; + size_t operandSize(uint32_t index) noexcept; + bool isUsageSet(uint32_t index) noexcept; + bool isOperationOutput(uint32_t index) noexcept; + void release(std::shared_ptr<neurun::ir::Graph> &graph) { graph = _graph; } + +private: + void setOptionalOperand(const neurun::ir::OperandIndex idx); + void fillOptionalOperand(void); + +private: + std::shared_ptr<neurun::ir::Graph> _graph; + std::unordered_set<neurun::ir::OperandIndex> _optional_operands; + std::vector<OperandUsage> _operand_usages; +}; + +#endif // __MODEL_H__ diff --git a/runtime/neurun/frontend/nnapi/wrapper/NNAPIConvert.cc b/runtime/neurun/frontend/nnapi/wrapper/NNAPIConvert.cc new file mode 100644 index 000000000..79589be75 --- /dev/null +++ b/runtime/neurun/frontend/nnapi/wrapper/NNAPIConvert.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NNAPIConvert.h" + +#include <numeric> + +using namespace neurun::ir; + +DataType NNAPIConvert::getDataType(OperandCode type) +{ + switch (type) + { + case ANEURALNETWORKS_FLOAT32: + case ANEURALNETWORKS_TENSOR_FLOAT32: + return DataType::FLOAT32; + case ANEURALNETWORKS_INT32: + case ANEURALNETWORKS_TENSOR_INT32: + return DataType::INT32; + case ANEURALNETWORKS_UINT32: + return DataType::UINT32; + case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM: + return DataType::QUANT8_ASYMM; + case ANEURALNETWORKS_TENSOR_QUANT8_SYMM: + return DataType::QUANT8_SYMM; + case ANEURALNETWORKS_BOOL: + case ANEURALNETWORKS_TENSOR_BOOL8: + return DataType::BOOL8; + default: + throw std::runtime_error("Unsupported type"); + } +} + +TypeInfo NNAPIConvert::getTypeInfo(const ANeuralNetworksOperandType *type) +{ + return TypeInfo(getDataType((OperandCode)(type->type)), type->scale, type->zeroPoint); +} + +Shape NNAPIConvert::getShape(const ANeuralNetworksOperandType *type) +{ + Shape shape(type->dimensionCount); + + for (uint32_t axis = 0; axis < type->dimensionCount; ++axis) + { + shape.dim(axis) = type->dimensions[axis]; + } + + return shape; +} + +size_t NNAPIConvert::calculateSizeFromType(const ANeuralNetworksOperandType *type) +{ + auto shape = getShape(type); + auto data_type = getDataType((OperandCode)(type->type)); + + return shape.num_elements() * sizeOfDataType(data_type); +} + +Activation NNAPIConvert::getFusedActivation(FuseCode act) +{ + switch (act) + { + case ANEURALNETWORKS_FUSED_NONE: + return Activation::NONE; + case ANEURALNETWORKS_FUSED_RELU: + return Activation::RELU; + case ANEURALNETWORKS_FUSED_RELU1: + return Activation::RELU1; + case ANEURALNETWORKS_FUSED_RELU6: + return Activation::RELU6; + default: + throw std::runtime_error("Unsupported activation type"); + } +} + +PaddingType NNAPIConvert::getPaddingType(PaddingCode type) +{ + switch (type) + { + case ANEURALNETWORKS_PADDING_SAME: + return PaddingType::SAME; + case ANEURALNETWORKS_PADDING_VALID: + return PaddingType::VALID; + default: + throw std::runtime_error("Unsupported type"); + } +} diff --git a/runtime/neurun/frontend/nnapi/wrapper/NNAPIConvert.h b/runtime/neurun/frontend/nnapi/wrapper/NNAPIConvert.h new file mode 100644 index 000000000..91f84b983 --- /dev/null +++ b/runtime/neurun/frontend/nnapi/wrapper/NNAPIConvert.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file NNAPIConvert.h + * @brief This file contains convereter(s)\n + * from NNAPI frontend's struct to neurun's internal struct + */ +#ifndef __NEURUN_NNAPI_CONVERT_H__ +#define __NEURUN_NNAPI_CONVERT_H__ + +#include <NeuralNetworks.h> + +#include <ir/TypeInfo.h> +#include <ir/Shape.h> +#include <ir/InternalType.h> + +class NNAPIConvert +{ + +public: + /** + * @brief Convert data type from NNAPI to internal data type + * @param[in] type NNAPI's data type + * @return neurun's internal data type + */ + static neurun::ir::DataType getDataType(OperandCode type); + + /** + * @brief Convert operand type info from NNAPI to interanl operand type info + * @param[in] type NNAPI's operand type + * @return neurun's internal operand type info + */ + static neurun::ir::TypeInfo getTypeInfo(const ANeuralNetworksOperandType *type); + + /** + * @brief Convert operand shape info from NNAPI to internal operand shape + * @param[in] type NNAPI's operand type + * @return neurun's internal operand shape + */ + static neurun::ir::Shape getShape(const ANeuralNetworksOperandType *type); + + /** + * @brief Calcaulate operand size from NNAPI type + * @param[in] type NNAPI's operand type + * @return Operand size + */ + static size_t calculateSizeFromType(const ANeuralNetworksOperandType *type); + + /** + * @brief Convert NNAPI FuseCode to internal activation type + * @param[in] act NNAPI's FuseCode type + * @return neurun's internal activation type + */ + static neurun::ir::Activation getFusedActivation(FuseCode act); + + /** + * @brief Convert NNAPI PaddingCode to internal padding type + * @param[in] type NNAPI's PaddingCode type + * @return neurun's internal padding type + */ + static neurun::ir::PaddingType getPaddingType(PaddingCode type); +}; + +#endif // __NEURUN_NNAPI_CONVERT_H__ diff --git a/runtime/neurun/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/neurun/frontend/nnapi/wrapper/OperationFactory.cc new file mode 100644 index 000000000..84f876e86 --- /dev/null +++ b/runtime/neurun/frontend/nnapi/wrapper/OperationFactory.cc @@ -0,0 +1,1680 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OperationFactory.h" +#include "NNAPIConvert.h" + +#include <ir/Operations.Include.h> +#include <string.h> + +namespace +{ +using namespace neurun::ir; + +void replaceDataType(Operands &operands, const OperandIndex &index, const DataType type) +{ + assert(operands.exist(index)); + operands.at(index).type(type); +} + +ExplicitPadding makeExplicitPadding(Operands &operands, const OperandIndex &left_index, + const OperandIndex &right_index, const OperandIndex &top_index, + const OperandIndex &bottom_index) +{ + auto left = operands.at(left_index).asScalar<int32_t>(); + auto right = operands.at(right_index).asScalar<int32_t>(); + auto top = operands.at(top_index).asScalar<int32_t>(); + auto bottom = operands.at(bottom_index).asScalar<int32_t>(); + + if (left < 0 || right < 0 || top < 0 || bottom < 0) + { + throw std::runtime_error{"Cannot handle negative explicit padding value"}; + } + + ExplicitPadding param; + param.left = static_cast<uint32_t>(left); + param.right = static_cast<uint32_t>(right); + param.top = static_cast<uint32_t>(top); + param.bottom = static_cast<uint32_t>(bottom); + + return param; +} + +Stride makeStride(Operands &operands, const OperandIndex &horizontal_index, + const OperandIndex &vertical_index) +{ + auto horizontal = operands.at(horizontal_index).asScalar<int32_t>(); + auto vertical = operands.at(vertical_index).asScalar<int32_t>(); + + if (vertical < 0 || horizontal < 0) + { + throw std::runtime_error{"Cannot handle negative stride value"}; + } + + Stride stride; + stride.horizontal = static_cast<uint32_t>(horizontal); + stride.vertical = static_cast<uint32_t>(vertical); + + return stride; +} + +uint32_t getUint32Scalar(Operands &operands, const OperandIndex index) +{ + auto int32_value = operands.at(index).asScalar<int32_t>(); + if (int32_value < 0) + { + throw std::runtime_error{"Cannot handle negative value"}; + } + + return static_cast<uint32_t>(int32_value); +} + +} // namespace + +OperationFactory &OperationFactory::get() +{ + static OperationFactory factory; + return factory; +} + +OperationFactory::OperationFactory() +{ + _map[ANEURALNETWORKS_BATCH_TO_SPACE_ND] = [](const OperationFactory::Param &init_param, + Operands &) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Block size Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + + return new operation::BatchToSpaceND{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_DEPTHWISE_CONV_2D] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert((init_param.input_count == 8 || init_param.input_count == 11) && + init_param.output_count == 1); + + // In common + // 0 -> IFM Tensor Index + // 1 -> Kernel Tensor Index + // 2 -> Bias Tensor Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + operation::DepthwiseConv2D::Param param; + if (init_param.input_count == 8) + { + // Imlicit Padding case + // Each input should be interpreted as follows: + // + // 3 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index + // 4 -> Stride (width) Index + // 5 -> Stride (height) INdex + // 6 -> Depthwise multiplier + // 7 -> Activation Index + + const auto padding_index = OperandIndex{init_param.inputs[3]}; + const auto hstride_index = OperandIndex{init_param.inputs[4]}; + const auto vstride_index = OperandIndex{init_param.inputs[5]}; + const auto multiplier_index = OperandIndex{init_param.inputs[6]}; + const auto activation_index = OperandIndex{init_param.inputs[7]}; + + param.padding.type = + NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>()); + param.stride = makeStride(operands, hstride_index, vstride_index); + param.multiplier = getUint32Scalar(operands, multiplier_index); + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + } + else + { + // Explicit Padding case + // Each input should be interpreted as follows: + // + // 3 -> Padding On the Left + // 4 -> Padding On the Right + // 5 -> Padding On the Top + // 6 -> Padding On the Bottom + // 7 -> Stride (width) Index + // 8 -> Stride (height) Index + // 9 -> Depthwise multiplier + // 10-> Activation Index + + const auto padding_left_index = OperandIndex{init_param.inputs[3]}; + const auto padding_right_index = OperandIndex{init_param.inputs[4]}; + const auto padding_top_index = OperandIndex{init_param.inputs[5]}; + const auto padding_bottom_index = OperandIndex{init_param.inputs[6]}; + const auto hstride_index = OperandIndex{init_param.inputs[7]}; + const auto vstride_index = OperandIndex{init_param.inputs[8]}; + const auto multiplier_index = OperandIndex{init_param.inputs[9]}; + const auto activation_index = OperandIndex{init_param.inputs[10]}; + + param.padding.type = PaddingType::EXPLICIT; + param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index, + padding_top_index, padding_bottom_index); + param.stride = makeStride(operands, hstride_index, vstride_index); + param.multiplier = getUint32Scalar(operands, multiplier_index); + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + } + + return new operation::DepthwiseConv2D{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_MAX_POOL_2D] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 7 || init_param.input_count == 10); + assert(init_param.output_count == 1); + + // In common + // 0 -> IFM Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + operation::MaxPool2D::Param param; + if (init_param.input_count == 7) // support implicit padding + { + // Each input should be interpreted as follows: + // + // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index + // 2 -> Horizontal (over width) Stride Index + // 3 -> Vertial (over height) Stride Index + // 4 -> Filter Width Index + // 5 -> Filter Height Index + // 6 -> FuseCode (activation) Index + + const auto padding_index = OperandIndex{init_param.inputs[1]}; + const auto hstride_index = OperandIndex{init_param.inputs[2]}; + const auto vstride_index = OperandIndex{init_param.inputs[3]}; + const auto kw_index = OperandIndex{init_param.inputs[4]}; + const auto kh_index = OperandIndex{init_param.inputs[5]}; + const auto activation_index = OperandIndex{init_param.inputs[6]}; + + param.padding.type = + NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>()); + param.stride = makeStride(operands, hstride_index, vstride_index); + param.kw = getUint32Scalar(operands, kw_index); + param.kh = operands.at(kh_index).asScalar<uint32_t>(); + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + } + else if (init_param.input_count == 10) // support explicit padding + { + // Each input should be interpreted as follows: + // + // 1 -> Padding_left index + // 2 -> Padding_right index + // 3 -> Padding_top index + // 4 -> Padding_bottom index + // 5 -> Horizontal (over width) Stride Index + // 6 -> Vertial (over height) Stride Index + // 7 -> Filter Width Index + // 8 -> Filter Height Index + // 9 -> FuseCode (activation) Index + + const auto padding_left_index = OperandIndex{init_param.inputs[1]}; + const auto padding_right_index = OperandIndex{init_param.inputs[2]}; + const auto padding_top_index = OperandIndex{init_param.inputs[3]}; + const auto padding_bottom_index = OperandIndex{init_param.inputs[4]}; + const auto hstride_index = OperandIndex{init_param.inputs[5]}; + const auto vstride_index = OperandIndex{init_param.inputs[6]}; + const auto kw_index = OperandIndex{init_param.inputs[7]}; + const auto kh_index = OperandIndex{init_param.inputs[8]}; + const auto activation_index = OperandIndex{init_param.inputs[9]}; + + param.padding.type = PaddingType::EXPLICIT; + param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index, + padding_top_index, padding_bottom_index); + param.stride = makeStride(operands, hstride_index, vstride_index); + param.kw = getUint32Scalar(operands, kw_index); + param.kh = getUint32Scalar(operands, kh_index); + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + } + return new operation::MaxPool2D{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = [](const OperationFactory::Param &init_param, + Operands &operands) { + // TODO We may reuse code here for MAX_POOL_2D. Seems like these two are identical + assert(init_param.input_count == 7 || init_param.input_count == 10); + assert(init_param.output_count == 1); + + // In common + // 0 -> IFM Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + operation::AvgPool2D::Param param; + if (init_param.input_count == 7) // support implicit padding + { + // Each input should be interpreted as follows: + // + // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index + // 2 -> Horizontal (over width) Stride Index + // 3 -> Vertial (over height) Stride Index + // 4 -> Filter Width Index + // 5 -> Filter Height Index + // 6 -> FuseCode (activation) Index + + const auto padding_index = OperandIndex{init_param.inputs[1]}; + const auto hstride_index = OperandIndex{init_param.inputs[2]}; + const auto vstride_index = OperandIndex{init_param.inputs[3]}; + const auto kw_index = OperandIndex{init_param.inputs[4]}; + const auto kh_index = OperandIndex{init_param.inputs[5]}; + const auto activation_index = OperandIndex{init_param.inputs[6]}; + + param.padding.type = + NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>()); + param.stride = makeStride(operands, hstride_index, vstride_index); + param.kw = getUint32Scalar(operands, kw_index); + param.kh = getUint32Scalar(operands, kh_index); + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + } + else if (init_param.input_count == 10) // support explicit padding + { + // Each input should be interpreted as follows: + // + // 1 -> Padding_left index + // 2 -> Padding_right index + // 3 -> Padding_top index + // 4 -> Padding_bottom index + // 5 -> Horizontal (over width) Stride Index + // 6 -> Vertial (over height) Stride Index + // 7 -> Filter Width Index + // 8 -> Filter Height Index + // 9 -> FuseCode (activation) Index + + const auto padding_left_index = OperandIndex{init_param.inputs[1]}; + const auto padding_right_index = OperandIndex{init_param.inputs[2]}; + const auto padding_top_index = OperandIndex{init_param.inputs[3]}; + const auto padding_bottom_index = OperandIndex{init_param.inputs[4]}; + const auto hstride_index = OperandIndex{init_param.inputs[5]}; + const auto vstride_index = OperandIndex{init_param.inputs[6]}; + const auto kw_index = OperandIndex{init_param.inputs[7]}; + const auto kh_index = OperandIndex{init_param.inputs[8]}; + const auto activation_index = OperandIndex{init_param.inputs[9]}; + + param.padding.type = PaddingType::EXPLICIT; + param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index, + padding_top_index, padding_bottom_index); + param.stride = makeStride(operands, hstride_index, vstride_index); + param.kw = getUint32Scalar(operands, kw_index); + param.kh = getUint32Scalar(operands, kh_index); + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + } + + return new operation::AvgPool2D{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_CONCATENATION] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count >= 2); // At least one one input tensor and axis + assert(init_param.output_count == 1); + + // When there are N + 1 inputs, each input should be interpreted as follows: + // + // [0, N) -> Input tensors + // N -> Axis + // + + OperandIndexSequence inputs; + for (uint32_t n = 0; n < init_param.input_count - 1; ++n) + { + inputs.append(OperandIndex{init_param.inputs[n]}); + } + OperandIndexSequence outputs{init_param.outputs[0]}; + + operation::Concat::Param param; + const OperandIndex axis_index{init_param.inputs[init_param.input_count - 1]}; + param.axis = operands.at(axis_index).asScalar<int32_t>(); + param.rank = operands.at(outputs.at(0)).shape().rank(); + + return new operation::Concat{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_RESHAPE] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + // Each input should be interpreted as follows: + // + // 0 -> A tensor, specifying the tensor to be reshaped. + // 1 -> A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32, defining the shape of the output + // tensor + + // TODO Second input should be shape tensor (init_param.inputs[1]) + // Currently unused since assume that it is same with output tensor size + OperandIndexSequence inputs{init_param.inputs[0] /* , init_param.inputs[1] */}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + return new operation::Reshape{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_FULLY_CONNECTED] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 4 && init_param.output_count == 1); + + // Each input should be interpreted as follows: + // + // 0 -> A tensor, specifying the input. + // 1 -> A 2-D tensor, specifying the weights + // 2 -> A 1-D tensor, specifying the bias + // 3 -> An INT32 value, and has to be one of the FuseCode values + + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + operation::FullyConnected::Param param; + const auto activation_index = OperandIndex{init_param.inputs[3]}; + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + + return new operation::FullyConnected{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_SOFTMAX] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + // Each input should be interpreted as follows: + // + // 0 -> A 2-D or 4-D tensor, specifying the tensor to be reshaped. + // 1 -> FLOAT32 value, specifying the positive scaling factor for the exponent, beta. + + OperandIndexSequence inputs{init_param.inputs[0]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + const auto beta_index = OperandIndex{init_param.inputs[1]}; + + operation::Softmax::Param param; + param.beta = operands.at(beta_index).asScalar<float>(); + + return new operation::Softmax{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_CAST] = [](const OperationFactory::Param &init_param, Operands &operands) { + assert(init_param.input_count == 1 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // 0 -> input Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + // NNAPI uses QUANT8_ASYMM to represent UINT8 type for ANEURALNETWORKS_CAST's input/output + if (operands.at(inputs.at(0)).typeInfo().type() == DataType::QUANT8_ASYMM) + { + replaceDataType(operands, inputs.at(0), DataType::UINT8); + } + if (operands.at(outputs.at(0)).typeInfo().type() == DataType::QUANT8_ASYMM) + { + replaceDataType(operands, outputs.at(0), DataType::UINT8); + } + + return new operation::Cast{inputs, outputs}; + }; + + // ANEURALNETWORKS_CAST_EX is deprecated + // TODO Remove ANEURALNETWORKS_CAST_EX + _map[ANEURALNETWORKS_CAST_EX] = _map[ANEURALNETWORKS_CAST]; + + _map[ANEURALNETWORKS_CONV_2D] = [](const OperationFactory::Param &init_param, + Operands &operands) { + using operation::Conv2D; + + // inputCount is either 7 or 10 acccording to NN API specification. + // - Padding is implicit when inputCount is 7 + // - Padding is explicit when inputCount is 10 + assert(init_param.input_count == 7 || init_param.input_count == 10); + assert(init_param.output_count == 1); + + // 0 -> IFM Tensor Index + // 1 -> Kernel Tensor Index + // 2 -> Bias Tensor Index + + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + Conv2D::Param param; + + if (init_param.input_count == 7) // support implicit padding + { + // Each input should be interpreted as follows: + // + // 3 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index + // 4 -> Stride (width) Index + // 5 -> Stride (height) INdex + // 6 -> Activation Index + + const auto padding_index = OperandIndex{init_param.inputs[3]}; + const auto hstride_index = OperandIndex{init_param.inputs[4]}; + const auto vstride_index = OperandIndex{init_param.inputs[5]}; + const auto activation_index = OperandIndex{init_param.inputs[6]}; + + param.padding.type = + NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>()); + param.stride = makeStride(operands, hstride_index, vstride_index); + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + } + else if (init_param.input_count == 10) // support explicit padding + { + // Each input should be interpreted as follows: + // + // 3 -> Padding_left index + // 4 -> Padding_right index + // 5 -> Padding_top index + // 6 -> Padding_bottom index + // 7 -> Stride (width) Index + // 8 -> Stride (height) INdex + // 9 -> Activation Index + + const auto padding_left_index = OperandIndex{init_param.inputs[3]}; + const auto padding_right_index = OperandIndex{init_param.inputs[4]}; + const auto padding_top_index = OperandIndex{init_param.inputs[5]}; + const auto padding_bottom_index = OperandIndex{init_param.inputs[6]}; + const auto hstride_index = OperandIndex{init_param.inputs[7]}; + const auto vstride_index = OperandIndex{init_param.inputs[8]}; + const auto activation_index = OperandIndex{init_param.inputs[9]}; + + param.padding.type = PaddingType::EXPLICIT; + param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index, + padding_top_index, padding_bottom_index); + param.stride = makeStride(operands, hstride_index, vstride_index); + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + } + + return new Conv2D{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_ADD] = [](const OperationFactory::Param &init_param, Operands &operands) { + assert(init_param.input_count == 3); + assert(init_param.output_count == 1); + + // Each input should be interpreted as follows: + // + // 0 -> Lefthand side operand + // 1 -> Righthand side operand + + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + operation::Add::Param param; + + const auto activation_index = OperandIndex{init_param.inputs[2]}; + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + + return new operation::Add{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_REDUCE_SUM_EX] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 3); + assert(init_param.output_count == 1); + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Reduced Axes Tensor Index + // 2 -> keep_dims Index + + OperandIndexSequence inputs{init_param.inputs[0]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + std::vector<std::int32_t> axes = + operands.at(OperandIndex{init_param.inputs[1]}).asVector<std::int32_t>(); + + operation::ReduceSum::Param param; + param.axes.assign(axes.cbegin(), axes.cend()); + param.keep_dims = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int32_t>() != 0; + param.rank = operands.at(inputs.at(0)).shape().rank(); + + return new operation::ReduceSum{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_SUB] = [](const OperationFactory::Param &init_param, Operands &operands) { + assert(init_param.input_count == 3); + assert(init_param.output_count == 1); + + // Each input should be interpreted as follows: + // + // 0 -> Lefthand side operand + // 1 -> Righthand side operand + + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + operation::Sub::Param param; + + const auto activation_index = OperandIndex{init_param.inputs[2]}; + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + + return new operation::Sub{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_SLICE] = [](const OperationFactory::Param &init_param, Operands &operands) { + assert(init_param.input_count == 3 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Begins Tensor Index + // 2 -> Sizes Tensor Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]}; + + operation::Slice::Param param; + param.rank = operands.at(inputs.at(0)).shape().rank(); + + return new operation::Slice{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_STRIDED_SLICE] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 7 && init_param.output_count == 1); + + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2], + init_param.inputs[3]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 1 -> A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the starts of + // the dimensions of the input tensor to be sliced. The length must be + // of rank(input0). + // 2 -> A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the ends of + // the dimensions of the input tensor to be sliced. The length must be + // of rank(input0). + // 3 -> A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the strides of + // the dimensions of the input tensor to be sliced. The length must be + // of rank(input0). + // 4 -> An {@link ANEURALNETWORKS_INT32} scalar, begin_mask. If the ith bit + // of begin_mask is set, begin[i] is ignored and the fullest possible + // range in that dimension is used instead. + // 5 -> An {@link ANEURALNETWORKS_INT32} scalar, end_mask. If the ith bit of + // end_mask is set, end[i] is ignored and the fullest possible range in + // that dimension is used instead. + // 6 -> An {@link ANEURALNETWORKS_INT32} scalar, shrink_axis_mask. An int32 + // mask. If the ith bit of shrink_axis_mask is set, it implies that the + // ith specification shrinks the dimensionality by 1. A slice of size 1 + // starting from begin[i] in the dimension must be preserved. + + operation::StridedSlice::Param param; + + param.begin_mask = operands.at(OperandIndex{init_param.inputs[4]}).asScalar<std::int32_t>(); + param.end_mask = operands.at(OperandIndex{init_param.inputs[5]}).asScalar<std::int32_t>(); + param.shrink_axis_mask = + operands.at(OperandIndex{init_param.inputs[6]}).asScalar<std::int32_t>(); + param.rank = operands.at(inputs.at(0)).shape().rank(); + + return new operation::StridedSlice{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_TRANSPOSE] = [](const OperationFactory::Param &init_param, + Operands &operands) { + // TODO make this work with init_param.input_count == 1 (when permutation vector is optional) + + // Inputs + // 0: An n-D tensor, specifying the tensor to be transposed. + // 1: An optional 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, + // the permutation of the dimensions of the input tensor. + // The returned tensor's dimension i corresponds to the input dimension + // perm[i]. If perm is not given, it is set to (n-1...0), where n is the + // rank of the input tensor. Hence by default, this operation performs a + // regular matrix transpose on 2-D input Tensors. + assert(init_param.input_count == 2); + assert(init_param.output_count == 1); + + OperandIndexSequence inputs{init_param.inputs[0]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + std::vector<std::int32_t> perm = + operands.at(OperandIndex{init_param.inputs[1]}).asVector<std::int32_t>(); + + operation::Transpose::Param param; + param.perm.assign(perm.cbegin(), perm.cend()); + param.rank = operands.at(inputs.at(0)).shape().rank(); + + return new operation::Transpose{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_MUL] = [](const OperationFactory::Param &init_param, Operands &operands) { + assert(init_param.input_count == 3 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> LHS Tensor Index + // 1 -> RHS Tensor Index + // 2 -> Activation Index + + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + + operation::Mul::Param param; + + const auto activation_index = OperandIndex{init_param.inputs[2]}; + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + + return new operation::Mul{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_SQUEEZE] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 1 || init_param.input_count == 2); + assert(init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> An n-D tensor, the tensor to be squeezed. + // 1 -> An optional 1-D tensor of ANEURALNETWORKS_TENSOR_INT32. The dimensions to squeeze. + // If specified only squeezes the dimensions listed. Otherwise, squeezes all dimensions. + // The dimension index starts at 0. An error must be reported if squeezing a dimension that + // is not 1. + + // Add mandatory input index + OperandIndexSequence inputs{init_param.inputs[0]}; + + // Add dims index if specified + operation::Squeeze::Param param{}; + if (init_param.input_count == 2) + { + auto squeeze_dims_idx = OperandIndex{init_param.inputs[1]}; + assert(operands.at(squeeze_dims_idx).shape().rank() == 1); + assert(operands.at(squeeze_dims_idx).shape().dim(0) >= 0); + assert(static_cast<uint32_t>(operands.at(squeeze_dims_idx).shape().dim(0)) <= + sizeof(param.dims)); + param.ndim = operands.at(squeeze_dims_idx).shape().dim(0); + if (param.ndim > 0) + memcpy(param.dims, operands.at(squeeze_dims_idx).data().base(), + param.ndim * sizeof(param.dims[0])); + } + + return new operation::Squeeze{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_TANH] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 1 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + return new operation::Tanh{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_LOGISTIC] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 1 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + return new operation::Logistic{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_DIV] = [](const OperationFactory::Param &init_param, Operands &operands) { + assert(init_param.input_count == 3 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> LHS Tensor Index + // 1 -> RHS Tensor Index + // 2 -> Activation Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + + operation::Div::Param param; + + const auto activation_index = OperandIndex{init_param.inputs[2]}; + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + + return new operation::Div{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_EXP] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 1 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + return new operation::Exp{inputs, outputs}; + }; + + // ANEURALNETWORKS_EXP_EX is deprecated + // TODO Remove ANEURALNETWORKS_EXP_EX + _map[ANEURALNETWORKS_EXP_EX] = _map[ANEURALNETWORKS_EXP]; + + _map[ANEURALNETWORKS_GREATER_EQUAL_EX] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> input0 Tensor Index + // 1 -> input1 Tensor Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + + operation::Comparison::Param param; + param.comparison_type = operation::Comparison::ComparisonType::GreaterEqual; + + // Output operand type must be boolean + replaceDataType(operands, outputs.at(0), DataType::BOOL8); + + return new operation::Comparison{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_LESS_EX] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> input0 Tensor Index + // 1 -> input1 Tensor Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + + operation::Comparison::Param param; + param.comparison_type = operation::Comparison::ComparisonType::Less; + + // Output operand type must be boolean + replaceDataType(operands, outputs.at(0), DataType::BOOL8); + + return new operation::Comparison{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_REDUCE_MAX] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 3 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Axis Tensor Index + // 2 -> keep_dims Index + OperandIndexSequence inputs{init_param.inputs[0]}; + std::vector<std::int32_t> axes = + operands.at(OperandIndex{init_param.inputs[1]}).asVector<std::int32_t>(); + + operation::ReduceMax::Param param; + param.axes.assign(axes.cbegin(), axes.cend()); + param.keep_dims = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int8_t>() != 0; + param.rank = operands.at(inputs.at(0)).shape().rank(); + + return new operation::ReduceMax{inputs, outputs, param}; + }; + + // ANEURALNETWORKS_REDUCE_MAX_EX is deprecated + // TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX + _map[ANEURALNETWORKS_REDUCE_MAX_EX] = _map[ANEURALNETWORKS_REDUCE_MAX]; + + _map[ANEURALNETWORKS_NOT_EQUAL_EX] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> input1 Tensor Index + // 1 -> input2 Tensor Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + + operation::Comparison::Param param; + param.comparison_type = operation::Comparison::ComparisonType::NotEqual; + + // Output operand type must be boolean + replaceDataType(operands, outputs.at(0), DataType::BOOL8); + + return new operation::Comparison{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_LOGICAL_AND_EX] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> input0 Tensor Index + // 1 -> input1 Tensor Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + + // This operation's operands must be boolean type. + replaceDataType(operands, inputs.at(0), DataType::BOOL8); + replaceDataType(operands, inputs.at(1), DataType::BOOL8); + replaceDataType(operands, outputs.at(0), DataType::BOOL8); + + return new operation::LogicalAnd{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_RSQRT] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 1 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + return new operation::RSQRT{inputs, outputs}; + }; + + // ANEURALNETWORKS_RSQRT_EX is deprecated + // TODO Remove ANEURALNETWORKS_RSQRT_EX + _map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT]; + + _map[ANEURALNETWORKS_RELU] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 1 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + return new operation::ReLU{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_RESIZE_BILINEAR] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 3 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> IFM Index + // 1 -> Height Index + // 2 -> Width Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + operation::ResizeBilinear::Param param; + param.height_out = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<int32_t>(); + param.width_out = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int32_t>(); + + return new operation::ResizeBilinear{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_RELU1] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 1 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> input Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + return new operation::ReLU1{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_RELU6] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 1 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> input Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + return new operation::ReLU6{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_RNN] = [](const OperationFactory::Param &init_param, Operands &operands) { + assert(init_param.input_count == 6 && init_param.output_count == 2); + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Weights Tensor Index + // 2 -> Recurrent Weights Tensor Index + // 3 -> Bias Tensor Index + // 4 -> Hidden state (in) Index + // 5 -> Activation Index + + OperandIndexSequence inputs; + for (uint32_t n = 0; n < init_param.input_count - 1; ++n) + { + inputs.append(OperandIndex{init_param.inputs[n]}); + } + OperandIndexSequence outputs; + for (uint32_t n = 0; n < init_param.output_count; ++n) + { + outputs.append(OperandIndex{init_param.outputs[n]}); + } + + operation::RNN::Param param; + const auto activation_index = OperandIndex{init_param.inputs[5]}; + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + + return new operation::RNN{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_FLOOR] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 1 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // 0 -> input Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + return new operation::Floor{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_SPACE_TO_BATCH_ND] = [](const OperationFactory::Param &init_param, + Operands &) { + assert(init_param.input_count == 3 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Block size Index + // 2 -> Paddings Index + OperandIndexSequence inputs; + for (uint32_t n = 0; n < init_param.input_count; ++n) + { + inputs.append(OperandIndex{init_param.inputs[n]}); + } + + return new operation::SpaceToBatchND{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_SPACE_TO_DEPTH] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Block size Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + operation::SpaceToDepth::Param param; + param.block_size = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>(); + + return new operation::SpaceToDepth{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_L2_POOL_2D] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 10 || init_param.input_count == 7); + assert(init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> IFM Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + operation::L2Pool2D::Param param; + + if (init_param.input_count == 7) // Imlicit Padding case + { + // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index + // 2 -> Horizontal (over width) Stride Index + // 3 -> Vertial (over height) Stride Index + // 4 -> Filter Width Index + // 5 -> Filter Height Index + // 6 -> FuseCode (activation) Index + const auto padding_index = OperandIndex{init_param.inputs[1]}; + const auto hstride_index = OperandIndex{init_param.inputs[2]}; + const auto vstride_index = OperandIndex{init_param.inputs[3]}; + const auto kw_index = OperandIndex{init_param.inputs[4]}; + const auto kh_index = OperandIndex{init_param.inputs[5]}; + const auto activation_index = OperandIndex{init_param.inputs[6]}; + + param.padding.type = + NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>()); + param.stride = makeStride(operands, hstride_index, vstride_index); + param.kw = getUint32Scalar(operands, kw_index); + param.kh = getUint32Scalar(operands, kh_index); + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + } + else // Explicit Padding case + { + // 1 -> Padding_left index + // 2 -> Padding_right index + // 3 -> Padding_top index + // 4 -> Padding_bottom index + // 5 -> Horizontal (over width) Stride Index + // 6 -> Vertial (over height) Stride Index + // 7 -> Filter Width Index + // 8 -> Filter Height Index + // 9 -> FuseCode (activation) Index + const auto padding_left_index = OperandIndex{init_param.inputs[1]}; + const auto padding_right_index = OperandIndex{init_param.inputs[2]}; + const auto padding_top_index = OperandIndex{init_param.inputs[3]}; + const auto padding_bottom_index = OperandIndex{init_param.inputs[4]}; + const auto hstride_index = OperandIndex{init_param.inputs[5]}; + const auto vstride_index = OperandIndex{init_param.inputs[6]}; + const auto kw_index = OperandIndex{init_param.inputs[7]}; + const auto kh_index = OperandIndex{init_param.inputs[8]}; + const auto activation_index = OperandIndex{init_param.inputs[9]}; + + param.padding.type = PaddingType::EXPLICIT; + param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index, + padding_top_index, padding_bottom_index); + param.stride = makeStride(operands, hstride_index, vstride_index); + param.kw = getUint32Scalar(operands, kw_index); + param.kh = getUint32Scalar(operands, kh_index); + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + } + + return new operation::L2Pool2D{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_EMBEDDING_LOOKUP] = [](const OperationFactory::Param &init_param, + Operands &) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Lookups Index + // 1 -> Values Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + + return new operation::EmbeddingLookup{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_L2_NORMALIZATION] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 1 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // 0 -> input Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + operation::L2Normalization::Param param; + param.rank = operands.at(inputs.at(0)).shape().rank(); + + return new operation::L2Normalization{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_HASHTABLE_LOOKUP] = [](const OperationFactory::Param &init_param, + Operands &) { + assert(init_param.input_count == 3 && init_param.output_count == 2); + + // Each output should be interpreted as follows: + // + // 0 -> Output Index + // 1 -> Hits Index + OperandIndexSequence outputs{init_param.outputs[0], init_param.outputs[1]}; + + // Each input should be interpreted as follows: + // + // 0 -> Lookups Index + // 1 -> Keys Index + // 2 -> Values Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]}; + + return new operation::HashtableLookup{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_PRELU_EX] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> input Tensor Index + // 1 -> alpha Tensor Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + + return new operation::PReLU{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_TRANSPOSE_CONV_EX] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 6 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Output Shape Index + // 1 -> Weights Index + // 2 -> Input Tensor Index + // 3 -> Padding Type + // 4 -> Stride width + // 5 -> Stride height + + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]}; + + operation::TransposeConv::Param param; + + const auto padding_index = OperandIndex{init_param.inputs[3]}; + const auto hstride_index = OperandIndex{init_param.inputs[4]}; + const auto vstride_index = OperandIndex{init_param.inputs[5]}; + + param.padding.type = + NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>()); + param.stride = makeStride(operands, hstride_index, vstride_index); + + return new operation::TransposeConv{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_SQRT] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 1 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // 0 -> input Tensor Index + + OperandIndexSequence inputs{init_param.inputs[0]}; + return new operation::SQRT{inputs, outputs}; + }; + + // ANEURALNETWORKS_SQRT_EX is deprecated + // TODO Remove ANEURALNETWORKS_SQRT_EX + _map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT]; + + _map[ANEURALNETWORKS_LOGICAL_OR_EX] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> input0 Tensor Index + // 1 -> input1 Tensor Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + + // This operation's operands must be boolean type. + replaceDataType(operands, inputs.at(0), DataType::BOOL8); + replaceDataType(operands, inputs.at(1), DataType::BOOL8); + replaceDataType(operands, outputs.at(0), DataType::BOOL8); + + return new operation::LogicalOr{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_LOGICAL_NOT_EX] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 1 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> input Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + // This operation's operands must be boolean type. + replaceDataType(operands, inputs.at(0), DataType::BOOL8); + replaceDataType(operands, outputs.at(0), DataType::BOOL8); + + return new operation::LogicalNot{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_LSTM] = [](const OperationFactory::Param &init_param, Operands &operands) { + assert(init_param.input_count == 23 && init_param.output_count == 4); + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Input to Input Tensor Index + // 2 -> Input to Forget Tensor Index + // 3 -> Input to Cell Tensor Index + // 4 -> Input to Output Tensor Index + // 5 -> Recurrent to Input Weights Tensor Index + // 6 -> Recurrent to Forget Weights Tensor Index + // 7 -> Recurrent to Cell Weights Tensor Index + // 8 -> Recurrent to Output Weights Tensor Index + // 9 -> Cell to Input Weights Tensor Index + // 10 -> Cell to Forget Weights Tensor Index + // 11 -> Cell to Output Weights Tensor Index + // 12 -> Input Gate Bias Tensor Index + // 13 -> Forget Gate Bias Tensor Index + // 14 -> Cell Bias Tensor Index + // 15 -> Output Gate Bias Tensor Index + // 16 -> Projection Weights Tensor Index + // 17 -> Projection Bias Tensor Index + // 18 -> Output State In Tensor Index + // 19 -> Cell State In Tensor Index + OperandIndexSequence inputs; + for (uint32_t n = 0; n < init_param.input_count - 3; ++n) + { + inputs.append(OperandIndex{init_param.inputs[n]}); + } + + // Each output should be interpreted as follows: + // + // 0 -> Scratch Buffer Tensor Index + // 1 -> Output State Out Tensor Index + // 2 -> Cell State Out Tensor Index + // 3 -> Output Tensor Index + OperandIndexSequence outputs; + for (uint32_t n = 0; n < init_param.output_count; ++n) + { + outputs.append(OperandIndex{init_param.outputs[n]}); + } + + operation::LSTM::Param param; + const auto activation_index = OperandIndex{init_param.inputs[20]}; + switch (operands.at(activation_index).asScalar<int32_t>()) + { + case 0: + param.activation = Activation::NONE; + break; + case 1: + param.activation = Activation::RELU; + break; + case 2: + param.activation = Activation::RELU1; + break; + case 3: + param.activation = Activation::RELU6; + break; + case 4: + param.activation = Activation::TANH; + break; + case 6: + param.activation = Activation::SIGMOID; + break; + default: + throw std::runtime_error("Unsupported activation type"); + break; + } + param.cell_threshold = operands.at(OperandIndex{init_param.inputs[21]}).asScalar<float>(); + param.projection_threshold = operands.at(OperandIndex{init_param.inputs[22]}).asScalar<float>(); + + return new operation::LSTM{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_EQUAL_EX] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> input0 Tensor Index + // 1 -> input1 Tensor Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + + operation::Comparison::Param param; + param.comparison_type = operation::Comparison::ComparisonType::Equal; + + // Output operand type must be boolean + replaceDataType(operands, outputs.at(0), DataType::BOOL8); + + return new operation::Comparison{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_SQUARED_DIFFERENCE_EX] = [](const OperationFactory::Param &init_param, + Operands &) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> LHS Tensor Index + // 1 -> RHS Tensor Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + + return new operation::SquaredDifference{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_TOPK_V2] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 2 && init_param.output_count == 2); + + // Each output should be interpreted as follows: + // + // 0 -> Index for Output Values + // 1 -> Index for Output Indices + OperandIndexSequence outputs{init_param.outputs[0], init_param.outputs[1]}; + + // Each input should be interpreted as follows: + // + // 0 -> Index for Input Data + // 1 -> Index for K + OperandIndexSequence inputs{init_param.inputs[0]}; + + operation::TopKV2::Param param; + param.k = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>(); + + return new operation::TopKV2{inputs, outputs, param}; + }; + + // ANEURALNETWORKS_CAST_EX is deprecated + // TODO Remove ANEURALNETWORKS_CAST_EX + _map[ANEURALNETWORKS_TOPK_V2_EX] = _map[ANEURALNETWORKS_TOPK_V2]; + + _map[ANEURALNETWORKS_GATHER] = [](const OperationFactory::Param &init_param, Operands &operands) { + assert(init_param.input_count == 3 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> input Tensor Index + // 1 -> axis Index + // 2 -> indices Tensor Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[2]}; + + operation::Gather::Param param; + param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<int32_t>(); + param.rank = operands.at(inputs.at(0)).shape().rank(); + + return new operation::Gather{inputs, outputs, param}; + }; + + // ANEURALNETWORKS_GATHER_EX is deprecated + // TODO Remove ANEURALNETWORKS_GATHER_EX + _map[ANEURALNETWORKS_GATHER_EX] = _map[ANEURALNETWORKS_GATHER]; + + _map[ANEURALNETWORKS_NEG] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 1 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + return new operation::Neg{inputs, outputs}; + }; + + // ANEURALNETWORKS_NEG_EX is deprecated + // TODO Remove ANEURALNETWORKS_NEG_EX + _map[ANEURALNETWORKS_NEG_EX] = _map[ANEURALNETWORKS_NEG]; + + _map[ANEURALNETWORKS_ABS] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 1 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + return new operation::Abs{inputs, outputs}; + }; + + // ANEURALNETWORKS_ABS_EX is deprecated + // TODO Remove ANEURALNETWORKS_ABS_EX + _map[ANEURALNETWORKS_ABS_EX] = _map[ANEURALNETWORKS_ABS]; + + _map[ANEURALNETWORKS_ARGMAX_EX] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Axis Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + operation::ArgMax::Param param; + param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>(); + param.rank = operands.at(inputs.at(0)).shape().rank(); + + return new operation::ArgMax{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_DEQUANTIZE] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 1 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + return new operation::Dequantize{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_MEAN] = [](const OperationFactory::Param &init_param, Operands &operands) { + assert(init_param.input_count == 3 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> ifm Tensor Index + // 1 -> axis Tensor Index + // 2 -> keep_dims Index + OperandIndexSequence inputs{init_param.inputs[0]}; + std::vector<std::int32_t> axes = + operands.at(OperandIndex{init_param.inputs[1]}).asVector<std::int32_t>(); + + operation::Mean::Param param; + param.axes.assign(axes.cbegin(), axes.cend()); + param.keep_dims = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int32_t>() != 0; + param.rank = operands.at(inputs.at(0)).shape().rank(); + + return new operation::Mean{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 5 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + OperandIndexSequence inputs{init_param.inputs[0]}; + + operation::LocalResponseNormalization::Param param; + param.radius = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>(); + param.bias = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<float>(); + param.alpha = operands.at(OperandIndex{init_param.inputs[3]}).asScalar<float>(); + param.beta = operands.at(OperandIndex{init_param.inputs[4]}).asScalar<float>(); + + return new operation::LocalResponseNormalization{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_DEPTH_TO_SPACE] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Block size Index + OperandIndexSequence inputs{init_param.inputs[0]}; + + operation::DepthToSpace::Param param; + param.block_size = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>(); + + return new operation::DepthToSpace{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_PACK_EX] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count >= 3 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + OperandIndexSequence inputs; + for (uint32_t n = 0; n < init_param.input_count - 2; ++n) + { + inputs.append(OperandIndex{init_param.inputs[n]}); + } + + operation::Pack::Param param; + const auto num_index = OperandIndex{init_param.inputs[init_param.input_count - 2]}; + const auto axis_index = OperandIndex{init_param.inputs[init_param.input_count - 1]}; + param.num = operands.at(num_index).asScalar<int32_t>(); + param.axis = operands.at(axis_index).asScalar<int32_t>(); + param.rank = operands.at(outputs.at(0)).shape().rank(); + + return new operation::Pack{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_REDUCE_MIN_EX] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 3 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Axis Tensor Index + // 2 -> keep_dims Index + OperandIndexSequence inputs{init_param.inputs[0]}; + std::vector<std::int32_t> axes = + operands.at(OperandIndex{init_param.inputs[1]}).asVector<std::int32_t>(); + + operation::ReduceMin::Param param; + param.axes.assign(axes.cbegin(), axes.cend()); + param.keep_dims = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int32_t>() != 0; + param.rank = operands.at(inputs.at(0)).shape().rank(); + + return new operation::ReduceMin{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_SPLIT_EX] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 3); + assert(init_param.output_count >= 1); // At least one output tensor and axis + + OperandIndexSequence inputs{init_param.inputs[0]}; + OperandIndexSequence outputs; + for (uint32_t n = 0; n < init_param.output_count; ++n) + { + outputs.append(OperandIndex{init_param.outputs[n]}); + } + + operation::Split::Param param; + param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>(); + param.num_splits = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<std::int32_t>(); + param.rank = operands.at(inputs.at(0)).shape().rank(); + + return new operation::Split{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_UNPACK_EX] = [](const OperationFactory::Param &init_param, + Operands &operands) { + assert(init_param.input_count == 3 && init_param.output_count >= 1); + + OperandIndexSequence inputs{init_param.inputs[0]}; + OperandIndexSequence outputs; + for (uint32_t n = 0; n < init_param.output_count; ++n) + { + outputs.append(OperandIndex{init_param.outputs[n]}); + } + + operation::Unpack::Param param; + const auto num_index = OperandIndex{init_param.inputs[1]}; + const auto axis_index = OperandIndex{init_param.inputs[2]}; + param.num = operands.at(num_index).asScalar<int32_t>(); + param.axis = operands.at(axis_index).asScalar<int32_t>(); + param.rank = operands.at(inputs.at(0)).shape().rank(); + + return new operation::Unpack{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_PAD] = [](const OperationFactory::Param &init_param, Operands &operands) { + assert(init_param.input_count == 2 && init_param.output_count >= 1); + + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + operation::Pad::Param param; + param.rank = operands.at(inputs.at(0)).shape().rank(); + + return new operation::Pad{inputs, outputs, param}; + }; + + _map[ANEURALNETWORKS_MINIMUM] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + return new operation::Min{inputs, outputs}; + }; + + _map[ANEURALNETWORKS_MAXIMUM] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + return new operation::Max{inputs, outputs}; + }; +} + +Operation *OperationFactory::create(ANeuralNetworksOperationType type, + const OperationFactory::Param ¶m, Operands &operands) +{ + auto it = _map.find(type); + if (it == _map.end()) + { + throw std::runtime_error("Unsupported operation type: " + std::to_string(type)); + } + return it->second(param, operands); +} diff --git a/runtime/neurun/frontend/nnapi/wrapper/OperationFactory.h b/runtime/neurun/frontend/nnapi/wrapper/OperationFactory.h new file mode 100644 index 000000000..003e4eb7a --- /dev/null +++ b/runtime/neurun/frontend/nnapi/wrapper/OperationFactory.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OPERATION_FACTORY_H__ +#define __OPERATION_FACTORY_H__ + +#include <unordered_map> + +#include "ir/Operands.h" +#include "ir/Operation.h" +#include "NeuralNetworks.h" +#include "NeuralNetworksEx.h" + +/** + * @brief A class to create a neurun operation object from NN API input parameters + */ +class OperationFactory +{ +public: + struct Param + { + uint32_t input_count; + const uint32_t *inputs; + uint32_t output_count; + const uint32_t *outputs; + }; + +public: + using Generator = std::function<neurun::ir::Operation *(const OperationFactory::Param &, + neurun::ir::Operands &)>; + +public: + static OperationFactory &get(); + +private: + OperationFactory(); + +public: + neurun::ir::Operation *create(ANeuralNetworksOperationType, const OperationFactory::Param ¶m, + neurun::ir::Operands &operands); + // TODO add "register" method for separating registration, possibly supporting custom-ops + +private: + std::unordered_map<ANeuralNetworksOperationType, Generator> _map; +}; + +#endif // __OPERATION_FACTORY_H__ diff --git a/runtime/neurun/frontend/tflite/CMakeLists.txt b/runtime/neurun/frontend/tflite/CMakeLists.txt new file mode 100644 index 000000000..5157869f3 --- /dev/null +++ b/runtime/neurun/frontend/tflite/CMakeLists.txt @@ -0,0 +1,17 @@ +if(NOT BUILD_TFLITE_LOADER) + return() +endif(NOT BUILD_TFLITE_LOADER) + +nnfw_find_package(FlatBuffersSource REQUIRED) + +set(TFLITE_LOADER_SOURCES src/tflite_loader.cc) + +add_library(tflite_loader SHARED ${TFLITE_LOADER_SOURCES}) + +target_include_directories(tflite_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) +target_include_directories(tflite_loader PRIVATE ${FlatBuffersSource_DIR}/include) + +target_link_libraries(tflite_loader PUBLIC neurun_core) +target_link_libraries(tflite_loader PRIVATE base_loader nnfw_lib_cpp14 nnfw_common nnfw_coverage) + +install(TARGETS tflite_loader DESTINATION lib) diff --git a/runtime/neurun/frontend/tflite/include/tflite_loader.h b/runtime/neurun/frontend/tflite/include/tflite_loader.h new file mode 100644 index 000000000..033230b4b --- /dev/null +++ b/runtime/neurun/frontend/tflite/include/tflite_loader.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __TFLITE_TFLITE_LOADER_H__ +#define __TFLITE_TFLITE_LOADER_H__ + +#include "ir/Graph.h" + +#include <memory> + +namespace neurun +{ +namespace tflite_loader +{ + +std::unique_ptr<ir::Graph> loadModel(const char *filename); + +} // namespace tflite_loader +} // namespace neurun + +#endif // __TFLITE_TFLITE_LOADER_H__ diff --git a/runtime/neurun/frontend/tflite/src/tflite_loader.cc b/runtime/neurun/frontend/tflite/src/tflite_loader.cc new file mode 100644 index 000000000..10a4fc095 --- /dev/null +++ b/runtime/neurun/frontend/tflite/src/tflite_loader.cc @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tflite_loader.h" +#include "base_loader.h" +#include "tflite_schema_generated.h" + +namespace neurun +{ +namespace tflite_loader +{ + +namespace +{ + +struct LoaderDomain +{ + using Verifier = flatbuffers::Verifier; + using ActivationFunctionType = neurun_tflite::ActivationFunctionType; + using Buffer = neurun_tflite::Buffer; + using BuiltinOperator = neurun_tflite::BuiltinOperator; + using CustomOptionsFormat = neurun_tflite::CustomOptionsFormat; + using Model = neurun_tflite::Model; + using Operator = neurun_tflite::Operator; + using Padding = neurun_tflite::Padding; + using Pool2DOptions = neurun_tflite::Pool2DOptions; + using Tensor = neurun_tflite::Tensor; + using TensorType = neurun_tflite::TensorType; + using SubGraph = neurun_tflite::SubGraph; + + static const char *EnumNameBuiltinOperator(BuiltinOperator e) + { + return neurun_tflite::EnumNameBuiltinOperator(e); + } + static const char *EnumNameActivationFunctionType(ActivationFunctionType e) + { + return neurun_tflite::EnumNameActivationFunctionType(e); + } + static const char *EnumNameTensorType(TensorType e) + { + return neurun_tflite::EnumNameTensorType(e); + } + static const Model *GetModel(const void *buf) { return neurun_tflite::GetModel(buf); } + static bool VerifyModelBuffer(Verifier &verifier) + { + return neurun_tflite::VerifyModelBuffer(verifier); + } +}; + +class TFLiteLoader final : public base_loader::BaseLoader<LoaderDomain, TFLiteLoader> +{ +public: + using BaseLoader::BaseLoader; + + void loadSubgraph(const neurun_tflite::SubGraph *subgraph) + { + // Load tensors + _tensor_to_operand.resize(subgraph->tensors()->size()); + for (flatbuffers::uoffset_t i = 0; i < subgraph->tensors()->size(); ++i) + { + _tensor_to_operand[i] = loadOperand(subgraph->tensors()->Get(i)); + } + // Set inputs + for (const std::int32_t input_ind : *subgraph->inputs()) + { + _graph.addInput(_tensor_to_operand[input_ind]); + } + // Set outputs + for (const std::int32_t output_ind : *subgraph->outputs()) + { + _graph.addOutput(_tensor_to_operand[output_ind]); + } + // Create operations + for (const auto *op : *subgraph->operators()) + { + loadOperation(op); + } + } +}; + +} // namespace + +std::unique_ptr<ir::Graph> loadModel(const char *filename) +{ + auto graph = nnfw::cpp14::make_unique<ir::Graph>(); + TFLiteLoader loader(*graph); + loader.loadFromFile(filename); + return graph; +} + +} // namespace tflite_loader +} // namespace neurun diff --git a/runtime/neurun/frontend/tflite/src/tflite_schema_generated.h b/runtime/neurun/frontend/tflite/src/tflite_schema_generated.h new file mode 100644 index 000000000..21669e2ff --- /dev/null +++ b/runtime/neurun/frontend/tflite/src/tflite_schema_generated.h @@ -0,0 +1,7275 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// automatically generated by the FlatBuffers compiler, do not modify + +#ifndef FLATBUFFERS_GENERATED_SCHEMA_NEURUN_TFLITE_H_ +#define FLATBUFFERS_GENERATED_SCHEMA_NEURUN_TFLITE_H_ + +#include "flatbuffers/flatbuffers.h" + +namespace neurun_tflite +{ + +struct CustomQuantization; + +struct QuantizationParameters; + +struct Tensor; + +struct Conv2DOptions; + +struct Pool2DOptions; + +struct DepthwiseConv2DOptions; + +struct ConcatEmbeddingsOptions; + +struct LSHProjectionOptions; + +struct SVDFOptions; + +struct RNNOptions; + +struct SequenceRNNOptions; + +struct BidirectionalSequenceRNNOptions; + +struct FullyConnectedOptions; + +struct SoftmaxOptions; + +struct ConcatenationOptions; + +struct AddOptions; + +struct MulOptions; + +struct L2NormOptions; + +struct LocalResponseNormalizationOptions; + +struct LSTMOptions; + +struct UnidirectionalSequenceLSTMOptions; + +struct BidirectionalSequenceLSTMOptions; + +struct ResizeBilinearOptions; + +struct ResizeNearestNeighborOptions; + +struct CallOptions; + +struct PadOptions; + +struct PadV2Options; + +struct ReshapeOptions; + +struct SpaceToBatchNDOptions; + +struct BatchToSpaceNDOptions; + +struct SkipGramOptions; + +struct SpaceToDepthOptions; + +struct SubOptions; + +struct DivOptions; + +struct TopKV2Options; + +struct EmbeddingLookupSparseOptions; + +struct GatherOptions; + +struct TransposeOptions; + +struct ExpOptions; + +struct ReducerOptions; + +struct SqueezeOptions; + +struct SplitOptions; + +struct SplitVOptions; + +struct StridedSliceOptions; + +struct LogSoftmaxOptions; + +struct CastOptions; + +struct DequantizeOptions; + +struct MaximumMinimumOptions; + +struct TileOptions; + +struct ArgMaxOptions; + +struct ArgMinOptions; + +struct GreaterOptions; + +struct GreaterEqualOptions; + +struct LessOptions; + +struct LessEqualOptions; + +struct NegOptions; + +struct SelectOptions; + +struct SliceOptions; + +struct TransposeConvOptions; + +struct ExpandDimsOptions; + +struct SparseToDenseOptions; + +struct EqualOptions; + +struct NotEqualOptions; + +struct ShapeOptions; + +struct PowOptions; + +struct FakeQuantOptions; + +struct PackOptions; + +struct LogicalOrOptions; + +struct OneHotOptions; + +struct AbsOptions; + +struct LogicalAndOptions; + +struct LogicalNotOptions; + +struct UnpackOptions; + +struct FloorDivOptions; + +struct SquareOptions; + +struct ZerosLikeOptions; + +struct FillOptions; + +struct FloorModOptions; + +struct RangeOptions; + +struct LeakyReluOptions; + +struct SquaredDifferenceOptions; + +struct MirrorPadOptions; + +struct OperatorCode; + +struct Operator; + +struct SubGraph; + +struct Buffer; + +struct Model; + +enum TensorType +{ + TensorType_FLOAT32 = 0, + TensorType_FLOAT16 = 1, + TensorType_INT32 = 2, + TensorType_UINT8 = 3, + TensorType_INT64 = 4, + TensorType_STRING = 5, + TensorType_BOOL = 6, + TensorType_INT16 = 7, + TensorType_COMPLEX64 = 8, + TensorType_INT8 = 9, + TensorType_MIN = TensorType_FLOAT32, + TensorType_MAX = TensorType_INT8 +}; + +inline const TensorType (&EnumValuesTensorType())[10] +{ + static const TensorType values[] = {TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32, + TensorType_UINT8, TensorType_INT64, TensorType_STRING, + TensorType_BOOL, TensorType_INT16, TensorType_COMPLEX64, + TensorType_INT8}; + return values; +} + +inline const char *const *EnumNamesTensorType() +{ + static const char *const names[] = {"FLOAT32", "FLOAT16", "INT32", "UINT8", "INT64", "STRING", + "BOOL", "INT16", "COMPLEX64", "INT8", nullptr}; + return names; +} + +inline const char *EnumNameTensorType(TensorType e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesTensorType()[index]; +} + +enum QuantizationDetails +{ + QuantizationDetails_NONE = 0, + QuantizationDetails_CustomQuantization = 1, + QuantizationDetails_MIN = QuantizationDetails_NONE, + QuantizationDetails_MAX = QuantizationDetails_CustomQuantization +}; + +inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2] +{ + static const QuantizationDetails values[] = {QuantizationDetails_NONE, + QuantizationDetails_CustomQuantization}; + return values; +} + +inline const char *const *EnumNamesQuantizationDetails() +{ + static const char *const names[] = {"NONE", "CustomQuantization", nullptr}; + return names; +} + +inline const char *EnumNameQuantizationDetails(QuantizationDetails e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesQuantizationDetails()[index]; +} + +template <typename T> struct QuantizationDetailsTraits +{ + static const QuantizationDetails enum_value = QuantizationDetails_NONE; +}; + +template <> struct QuantizationDetailsTraits<CustomQuantization> +{ + static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization; +}; + +bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj, + QuantizationDetails type); +bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier, + const flatbuffers::Vector<flatbuffers::Offset<void>> *values, + const flatbuffers::Vector<uint8_t> *types); + +enum BuiltinOperator +{ + BuiltinOperator_ADD = 0, + BuiltinOperator_AVERAGE_POOL_2D = 1, + BuiltinOperator_CONCATENATION = 2, + BuiltinOperator_CONV_2D = 3, + BuiltinOperator_DEPTHWISE_CONV_2D = 4, + BuiltinOperator_DEQUANTIZE = 6, + BuiltinOperator_EMBEDDING_LOOKUP = 7, + BuiltinOperator_FLOOR = 8, + BuiltinOperator_FULLY_CONNECTED = 9, + BuiltinOperator_HASHTABLE_LOOKUP = 10, + BuiltinOperator_L2_NORMALIZATION = 11, + BuiltinOperator_L2_POOL_2D = 12, + BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION = 13, + BuiltinOperator_LOGISTIC = 14, + BuiltinOperator_LSH_PROJECTION = 15, + BuiltinOperator_LSTM = 16, + BuiltinOperator_MAX_POOL_2D = 17, + BuiltinOperator_MUL = 18, + BuiltinOperator_RELU = 19, + BuiltinOperator_RELU_N1_TO_1 = 20, + BuiltinOperator_RELU6 = 21, + BuiltinOperator_RESHAPE = 22, + BuiltinOperator_RESIZE_BILINEAR = 23, + BuiltinOperator_RNN = 24, + BuiltinOperator_SOFTMAX = 25, + BuiltinOperator_SPACE_TO_DEPTH = 26, + BuiltinOperator_SVDF = 27, + BuiltinOperator_TANH = 28, + BuiltinOperator_CONCAT_EMBEDDINGS = 29, + BuiltinOperator_SKIP_GRAM = 30, + BuiltinOperator_CALL = 31, + BuiltinOperator_CUSTOM = 32, + BuiltinOperator_EMBEDDING_LOOKUP_SPARSE = 33, + BuiltinOperator_PAD = 34, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN = 35, + BuiltinOperator_GATHER = 36, + BuiltinOperator_BATCH_TO_SPACE_ND = 37, + BuiltinOperator_SPACE_TO_BATCH_ND = 38, + BuiltinOperator_TRANSPOSE = 39, + BuiltinOperator_MEAN = 40, + BuiltinOperator_SUB = 41, + BuiltinOperator_DIV = 42, + BuiltinOperator_SQUEEZE = 43, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM = 44, + BuiltinOperator_STRIDED_SLICE = 45, + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN = 46, + BuiltinOperator_EXP = 47, + BuiltinOperator_TOPK_V2 = 48, + BuiltinOperator_SPLIT = 49, + BuiltinOperator_LOG_SOFTMAX = 50, + BuiltinOperator_DELEGATE = 51, + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52, + BuiltinOperator_CAST = 53, + BuiltinOperator_PRELU = 54, + BuiltinOperator_MAXIMUM = 55, + BuiltinOperator_ARG_MAX = 56, + BuiltinOperator_MINIMUM = 57, + BuiltinOperator_LESS = 58, + BuiltinOperator_NEG = 59, + BuiltinOperator_PADV2 = 60, + BuiltinOperator_GREATER = 61, + BuiltinOperator_GREATER_EQUAL = 62, + BuiltinOperator_LESS_EQUAL = 63, + BuiltinOperator_SELECT = 64, + BuiltinOperator_SLICE = 65, + BuiltinOperator_SIN = 66, + BuiltinOperator_TRANSPOSE_CONV = 67, + BuiltinOperator_SPARSE_TO_DENSE = 68, + BuiltinOperator_TILE = 69, + BuiltinOperator_EXPAND_DIMS = 70, + BuiltinOperator_EQUAL = 71, + BuiltinOperator_NOT_EQUAL = 72, + BuiltinOperator_LOG = 73, + BuiltinOperator_SUM = 74, + BuiltinOperator_SQRT = 75, + BuiltinOperator_RSQRT = 76, + BuiltinOperator_SHAPE = 77, + BuiltinOperator_POW = 78, + BuiltinOperator_ARG_MIN = 79, + BuiltinOperator_FAKE_QUANT = 80, + BuiltinOperator_REDUCE_PROD = 81, + BuiltinOperator_REDUCE_MAX = 82, + BuiltinOperator_PACK = 83, + BuiltinOperator_LOGICAL_OR = 84, + BuiltinOperator_ONE_HOT = 85, + BuiltinOperator_LOGICAL_AND = 86, + BuiltinOperator_LOGICAL_NOT = 87, + BuiltinOperator_UNPACK = 88, + BuiltinOperator_REDUCE_MIN = 89, + BuiltinOperator_FLOOR_DIV = 90, + BuiltinOperator_REDUCE_ANY = 91, + BuiltinOperator_SQUARE = 92, + BuiltinOperator_ZEROS_LIKE = 93, + BuiltinOperator_FILL = 94, + BuiltinOperator_FLOOR_MOD = 95, + BuiltinOperator_RANGE = 96, + BuiltinOperator_RESIZE_NEAREST_NEIGHBOR = 97, + BuiltinOperator_LEAKY_RELU = 98, + BuiltinOperator_SQUARED_DIFFERENCE = 99, + BuiltinOperator_MIRROR_PAD = 100, + BuiltinOperator_ABS = 101, + BuiltinOperator_SPLIT_V = 102, + BuiltinOperator_MIN = BuiltinOperator_ADD, + BuiltinOperator_MAX = BuiltinOperator_SPLIT_V +}; + +inline const BuiltinOperator (&EnumValuesBuiltinOperator())[102] +{ + static const BuiltinOperator values[] = {BuiltinOperator_ADD, + BuiltinOperator_AVERAGE_POOL_2D, + BuiltinOperator_CONCATENATION, + BuiltinOperator_CONV_2D, + BuiltinOperator_DEPTHWISE_CONV_2D, + BuiltinOperator_DEQUANTIZE, + BuiltinOperator_EMBEDDING_LOOKUP, + BuiltinOperator_FLOOR, + BuiltinOperator_FULLY_CONNECTED, + BuiltinOperator_HASHTABLE_LOOKUP, + BuiltinOperator_L2_NORMALIZATION, + BuiltinOperator_L2_POOL_2D, + BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, + BuiltinOperator_LOGISTIC, + BuiltinOperator_LSH_PROJECTION, + BuiltinOperator_LSTM, + BuiltinOperator_MAX_POOL_2D, + BuiltinOperator_MUL, + BuiltinOperator_RELU, + BuiltinOperator_RELU_N1_TO_1, + BuiltinOperator_RELU6, + BuiltinOperator_RESHAPE, + BuiltinOperator_RESIZE_BILINEAR, + BuiltinOperator_RNN, + BuiltinOperator_SOFTMAX, + BuiltinOperator_SPACE_TO_DEPTH, + BuiltinOperator_SVDF, + BuiltinOperator_TANH, + BuiltinOperator_CONCAT_EMBEDDINGS, + BuiltinOperator_SKIP_GRAM, + BuiltinOperator_CALL, + BuiltinOperator_CUSTOM, + BuiltinOperator_EMBEDDING_LOOKUP_SPARSE, + BuiltinOperator_PAD, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN, + BuiltinOperator_GATHER, + BuiltinOperator_BATCH_TO_SPACE_ND, + BuiltinOperator_SPACE_TO_BATCH_ND, + BuiltinOperator_TRANSPOSE, + BuiltinOperator_MEAN, + BuiltinOperator_SUB, + BuiltinOperator_DIV, + BuiltinOperator_SQUEEZE, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, + BuiltinOperator_STRIDED_SLICE, + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN, + BuiltinOperator_EXP, + BuiltinOperator_TOPK_V2, + BuiltinOperator_SPLIT, + BuiltinOperator_LOG_SOFTMAX, + BuiltinOperator_DELEGATE, + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, + BuiltinOperator_CAST, + BuiltinOperator_PRELU, + BuiltinOperator_MAXIMUM, + BuiltinOperator_ARG_MAX, + BuiltinOperator_MINIMUM, + BuiltinOperator_LESS, + BuiltinOperator_NEG, + BuiltinOperator_PADV2, + BuiltinOperator_GREATER, + BuiltinOperator_GREATER_EQUAL, + BuiltinOperator_LESS_EQUAL, + BuiltinOperator_SELECT, + BuiltinOperator_SLICE, + BuiltinOperator_SIN, + BuiltinOperator_TRANSPOSE_CONV, + BuiltinOperator_SPARSE_TO_DENSE, + BuiltinOperator_TILE, + BuiltinOperator_EXPAND_DIMS, + BuiltinOperator_EQUAL, + BuiltinOperator_NOT_EQUAL, + BuiltinOperator_LOG, + BuiltinOperator_SUM, + BuiltinOperator_SQRT, + BuiltinOperator_RSQRT, + BuiltinOperator_SHAPE, + BuiltinOperator_POW, + BuiltinOperator_ARG_MIN, + BuiltinOperator_FAKE_QUANT, + BuiltinOperator_REDUCE_PROD, + BuiltinOperator_REDUCE_MAX, + BuiltinOperator_PACK, + BuiltinOperator_LOGICAL_OR, + BuiltinOperator_ONE_HOT, + BuiltinOperator_LOGICAL_AND, + BuiltinOperator_LOGICAL_NOT, + BuiltinOperator_UNPACK, + BuiltinOperator_REDUCE_MIN, + BuiltinOperator_FLOOR_DIV, + BuiltinOperator_REDUCE_ANY, + BuiltinOperator_SQUARE, + BuiltinOperator_ZEROS_LIKE, + BuiltinOperator_FILL, + BuiltinOperator_FLOOR_MOD, + BuiltinOperator_RANGE, + BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, + BuiltinOperator_LEAKY_RELU, + BuiltinOperator_SQUARED_DIFFERENCE, + BuiltinOperator_MIRROR_PAD, + BuiltinOperator_ABS, + BuiltinOperator_SPLIT_V}; + return values; +} + +inline const char *const *EnumNamesBuiltinOperator() +{ + static const char *const names[] = {"ADD", + "AVERAGE_POOL_2D", + "CONCATENATION", + "CONV_2D", + "DEPTHWISE_CONV_2D", + "", + "DEQUANTIZE", + "EMBEDDING_LOOKUP", + "FLOOR", + "FULLY_CONNECTED", + "HASHTABLE_LOOKUP", + "L2_NORMALIZATION", + "L2_POOL_2D", + "LOCAL_RESPONSE_NORMALIZATION", + "LOGISTIC", + "LSH_PROJECTION", + "LSTM", + "MAX_POOL_2D", + "MUL", + "RELU", + "RELU_N1_TO_1", + "RELU6", + "RESHAPE", + "RESIZE_BILINEAR", + "RNN", + "SOFTMAX", + "SPACE_TO_DEPTH", + "SVDF", + "TANH", + "CONCAT_EMBEDDINGS", + "SKIP_GRAM", + "CALL", + "CUSTOM", + "EMBEDDING_LOOKUP_SPARSE", + "PAD", + "UNIDIRECTIONAL_SEQUENCE_RNN", + "GATHER", + "BATCH_TO_SPACE_ND", + "SPACE_TO_BATCH_ND", + "TRANSPOSE", + "MEAN", + "SUB", + "DIV", + "SQUEEZE", + "UNIDIRECTIONAL_SEQUENCE_LSTM", + "STRIDED_SLICE", + "BIDIRECTIONAL_SEQUENCE_RNN", + "EXP", + "TOPK_V2", + "SPLIT", + "LOG_SOFTMAX", + "DELEGATE", + "BIDIRECTIONAL_SEQUENCE_LSTM", + "CAST", + "PRELU", + "MAXIMUM", + "ARG_MAX", + "MINIMUM", + "LESS", + "NEG", + "PADV2", + "GREATER", + "GREATER_EQUAL", + "LESS_EQUAL", + "SELECT", + "SLICE", + "SIN", + "TRANSPOSE_CONV", + "SPARSE_TO_DENSE", + "TILE", + "EXPAND_DIMS", + "EQUAL", + "NOT_EQUAL", + "LOG", + "SUM", + "SQRT", + "RSQRT", + "SHAPE", + "POW", + "ARG_MIN", + "FAKE_QUANT", + "REDUCE_PROD", + "REDUCE_MAX", + "PACK", + "LOGICAL_OR", + "ONE_HOT", + "LOGICAL_AND", + "LOGICAL_NOT", + "UNPACK", + "REDUCE_MIN", + "FLOOR_DIV", + "REDUCE_ANY", + "SQUARE", + "ZEROS_LIKE", + "FILL", + "FLOOR_MOD", + "RANGE", + "RESIZE_NEAREST_NEIGHBOR", + "LEAKY_RELU", + "SQUARED_DIFFERENCE", + "MIRROR_PAD", + "ABS", + "SPLIT_V", + nullptr}; + return names; +} + +inline const char *EnumNameBuiltinOperator(BuiltinOperator e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesBuiltinOperator()[index]; +} + +enum BuiltinOptions +{ + BuiltinOptions_NONE = 0, + BuiltinOptions_Conv2DOptions = 1, + BuiltinOptions_DepthwiseConv2DOptions = 2, + BuiltinOptions_ConcatEmbeddingsOptions = 3, + BuiltinOptions_LSHProjectionOptions = 4, + BuiltinOptions_Pool2DOptions = 5, + BuiltinOptions_SVDFOptions = 6, + BuiltinOptions_RNNOptions = 7, + BuiltinOptions_FullyConnectedOptions = 8, + BuiltinOptions_SoftmaxOptions = 9, + BuiltinOptions_ConcatenationOptions = 10, + BuiltinOptions_AddOptions = 11, + BuiltinOptions_L2NormOptions = 12, + BuiltinOptions_LocalResponseNormalizationOptions = 13, + BuiltinOptions_LSTMOptions = 14, + BuiltinOptions_ResizeBilinearOptions = 15, + BuiltinOptions_CallOptions = 16, + BuiltinOptions_ReshapeOptions = 17, + BuiltinOptions_SkipGramOptions = 18, + BuiltinOptions_SpaceToDepthOptions = 19, + BuiltinOptions_EmbeddingLookupSparseOptions = 20, + BuiltinOptions_MulOptions = 21, + BuiltinOptions_PadOptions = 22, + BuiltinOptions_GatherOptions = 23, + BuiltinOptions_BatchToSpaceNDOptions = 24, + BuiltinOptions_SpaceToBatchNDOptions = 25, + BuiltinOptions_TransposeOptions = 26, + BuiltinOptions_ReducerOptions = 27, + BuiltinOptions_SubOptions = 28, + BuiltinOptions_DivOptions = 29, + BuiltinOptions_SqueezeOptions = 30, + BuiltinOptions_SequenceRNNOptions = 31, + BuiltinOptions_StridedSliceOptions = 32, + BuiltinOptions_ExpOptions = 33, + BuiltinOptions_TopKV2Options = 34, + BuiltinOptions_SplitOptions = 35, + BuiltinOptions_LogSoftmaxOptions = 36, + BuiltinOptions_CastOptions = 37, + BuiltinOptions_DequantizeOptions = 38, + BuiltinOptions_MaximumMinimumOptions = 39, + BuiltinOptions_ArgMaxOptions = 40, + BuiltinOptions_LessOptions = 41, + BuiltinOptions_NegOptions = 42, + BuiltinOptions_PadV2Options = 43, + BuiltinOptions_GreaterOptions = 44, + BuiltinOptions_GreaterEqualOptions = 45, + BuiltinOptions_LessEqualOptions = 46, + BuiltinOptions_SelectOptions = 47, + BuiltinOptions_SliceOptions = 48, + BuiltinOptions_TransposeConvOptions = 49, + BuiltinOptions_SparseToDenseOptions = 50, + BuiltinOptions_TileOptions = 51, + BuiltinOptions_ExpandDimsOptions = 52, + BuiltinOptions_EqualOptions = 53, + BuiltinOptions_NotEqualOptions = 54, + BuiltinOptions_ShapeOptions = 55, + BuiltinOptions_PowOptions = 56, + BuiltinOptions_ArgMinOptions = 57, + BuiltinOptions_FakeQuantOptions = 58, + BuiltinOptions_PackOptions = 59, + BuiltinOptions_LogicalOrOptions = 60, + BuiltinOptions_OneHotOptions = 61, + BuiltinOptions_LogicalAndOptions = 62, + BuiltinOptions_LogicalNotOptions = 63, + BuiltinOptions_UnpackOptions = 64, + BuiltinOptions_FloorDivOptions = 65, + BuiltinOptions_SquareOptions = 66, + BuiltinOptions_ZerosLikeOptions = 67, + BuiltinOptions_FillOptions = 68, + BuiltinOptions_BidirectionalSequenceLSTMOptions = 69, + BuiltinOptions_BidirectionalSequenceRNNOptions = 70, + BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71, + BuiltinOptions_FloorModOptions = 72, + BuiltinOptions_RangeOptions = 73, + BuiltinOptions_ResizeNearestNeighborOptions = 74, + BuiltinOptions_LeakyReluOptions = 75, + BuiltinOptions_SquaredDifferenceOptions = 76, + BuiltinOptions_MirrorPadOptions = 77, + BuiltinOptions_AbsOptions = 78, + BuiltinOptions_SplitVOptions = 79, + BuiltinOptions_MIN = BuiltinOptions_NONE, + BuiltinOptions_MAX = BuiltinOptions_SplitVOptions +}; + +inline const BuiltinOptions (&EnumValuesBuiltinOptions())[80] +{ + static const BuiltinOptions values[] = {BuiltinOptions_NONE, + BuiltinOptions_Conv2DOptions, + BuiltinOptions_DepthwiseConv2DOptions, + BuiltinOptions_ConcatEmbeddingsOptions, + BuiltinOptions_LSHProjectionOptions, + BuiltinOptions_Pool2DOptions, + BuiltinOptions_SVDFOptions, + BuiltinOptions_RNNOptions, + BuiltinOptions_FullyConnectedOptions, + BuiltinOptions_SoftmaxOptions, + BuiltinOptions_ConcatenationOptions, + BuiltinOptions_AddOptions, + BuiltinOptions_L2NormOptions, + BuiltinOptions_LocalResponseNormalizationOptions, + BuiltinOptions_LSTMOptions, + BuiltinOptions_ResizeBilinearOptions, + BuiltinOptions_CallOptions, + BuiltinOptions_ReshapeOptions, + BuiltinOptions_SkipGramOptions, + BuiltinOptions_SpaceToDepthOptions, + BuiltinOptions_EmbeddingLookupSparseOptions, + BuiltinOptions_MulOptions, + BuiltinOptions_PadOptions, + BuiltinOptions_GatherOptions, + BuiltinOptions_BatchToSpaceNDOptions, + BuiltinOptions_SpaceToBatchNDOptions, + BuiltinOptions_TransposeOptions, + BuiltinOptions_ReducerOptions, + BuiltinOptions_SubOptions, + BuiltinOptions_DivOptions, + BuiltinOptions_SqueezeOptions, + BuiltinOptions_SequenceRNNOptions, + BuiltinOptions_StridedSliceOptions, + BuiltinOptions_ExpOptions, + BuiltinOptions_TopKV2Options, + BuiltinOptions_SplitOptions, + BuiltinOptions_LogSoftmaxOptions, + BuiltinOptions_CastOptions, + BuiltinOptions_DequantizeOptions, + BuiltinOptions_MaximumMinimumOptions, + BuiltinOptions_ArgMaxOptions, + BuiltinOptions_LessOptions, + BuiltinOptions_NegOptions, + BuiltinOptions_PadV2Options, + BuiltinOptions_GreaterOptions, + BuiltinOptions_GreaterEqualOptions, + BuiltinOptions_LessEqualOptions, + BuiltinOptions_SelectOptions, + BuiltinOptions_SliceOptions, + BuiltinOptions_TransposeConvOptions, + BuiltinOptions_SparseToDenseOptions, + BuiltinOptions_TileOptions, + BuiltinOptions_ExpandDimsOptions, + BuiltinOptions_EqualOptions, + BuiltinOptions_NotEqualOptions, + BuiltinOptions_ShapeOptions, + BuiltinOptions_PowOptions, + BuiltinOptions_ArgMinOptions, + BuiltinOptions_FakeQuantOptions, + BuiltinOptions_PackOptions, + BuiltinOptions_LogicalOrOptions, + BuiltinOptions_OneHotOptions, + BuiltinOptions_LogicalAndOptions, + BuiltinOptions_LogicalNotOptions, + BuiltinOptions_UnpackOptions, + BuiltinOptions_FloorDivOptions, + BuiltinOptions_SquareOptions, + BuiltinOptions_ZerosLikeOptions, + BuiltinOptions_FillOptions, + BuiltinOptions_BidirectionalSequenceLSTMOptions, + BuiltinOptions_BidirectionalSequenceRNNOptions, + BuiltinOptions_UnidirectionalSequenceLSTMOptions, + BuiltinOptions_FloorModOptions, + BuiltinOptions_RangeOptions, + BuiltinOptions_ResizeNearestNeighborOptions, + BuiltinOptions_LeakyReluOptions, + BuiltinOptions_SquaredDifferenceOptions, + BuiltinOptions_MirrorPadOptions, + BuiltinOptions_AbsOptions, + BuiltinOptions_SplitVOptions}; + return values; +} + +inline const char *const *EnumNamesBuiltinOptions() +{ + static const char *const names[] = {"NONE", + "Conv2DOptions", + "DepthwiseConv2DOptions", + "ConcatEmbeddingsOptions", + "LSHProjectionOptions", + "Pool2DOptions", + "SVDFOptions", + "RNNOptions", + "FullyConnectedOptions", + "SoftmaxOptions", + "ConcatenationOptions", + "AddOptions", + "L2NormOptions", + "LocalResponseNormalizationOptions", + "LSTMOptions", + "ResizeBilinearOptions", + "CallOptions", + "ReshapeOptions", + "SkipGramOptions", + "SpaceToDepthOptions", + "EmbeddingLookupSparseOptions", + "MulOptions", + "PadOptions", + "GatherOptions", + "BatchToSpaceNDOptions", + "SpaceToBatchNDOptions", + "TransposeOptions", + "ReducerOptions", + "SubOptions", + "DivOptions", + "SqueezeOptions", + "SequenceRNNOptions", + "StridedSliceOptions", + "ExpOptions", + "TopKV2Options", + "SplitOptions", + "LogSoftmaxOptions", + "CastOptions", + "DequantizeOptions", + "MaximumMinimumOptions", + "ArgMaxOptions", + "LessOptions", + "NegOptions", + "PadV2Options", + "GreaterOptions", + "GreaterEqualOptions", + "LessEqualOptions", + "SelectOptions", + "SliceOptions", + "TransposeConvOptions", + "SparseToDenseOptions", + "TileOptions", + "ExpandDimsOptions", + "EqualOptions", + "NotEqualOptions", + "ShapeOptions", + "PowOptions", + "ArgMinOptions", + "FakeQuantOptions", + "PackOptions", + "LogicalOrOptions", + "OneHotOptions", + "LogicalAndOptions", + "LogicalNotOptions", + "UnpackOptions", + "FloorDivOptions", + "SquareOptions", + "ZerosLikeOptions", + "FillOptions", + "BidirectionalSequenceLSTMOptions", + "BidirectionalSequenceRNNOptions", + "UnidirectionalSequenceLSTMOptions", + "FloorModOptions", + "RangeOptions", + "ResizeNearestNeighborOptions", + "LeakyReluOptions", + "SquaredDifferenceOptions", + "MirrorPadOptions", + "AbsOptions", + "SplitVOptions", + nullptr}; + return names; +} + +inline const char *EnumNameBuiltinOptions(BuiltinOptions e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesBuiltinOptions()[index]; +} + +template <typename T> struct BuiltinOptionsTraits +{ + static const BuiltinOptions enum_value = BuiltinOptions_NONE; +}; + +template <> struct BuiltinOptionsTraits<Conv2DOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions; +}; + +template <> struct BuiltinOptionsTraits<DepthwiseConv2DOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions; +}; + +template <> struct BuiltinOptionsTraits<ConcatEmbeddingsOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions; +}; + +template <> struct BuiltinOptionsTraits<LSHProjectionOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions; +}; + +template <> struct BuiltinOptionsTraits<Pool2DOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions; +}; + +template <> struct BuiltinOptionsTraits<SVDFOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions; +}; + +template <> struct BuiltinOptionsTraits<RNNOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions; +}; + +template <> struct BuiltinOptionsTraits<FullyConnectedOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions; +}; + +template <> struct BuiltinOptionsTraits<SoftmaxOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions; +}; + +template <> struct BuiltinOptionsTraits<ConcatenationOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions; +}; + +template <> struct BuiltinOptionsTraits<AddOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_AddOptions; +}; + +template <> struct BuiltinOptionsTraits<L2NormOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions; +}; + +template <> struct BuiltinOptionsTraits<LocalResponseNormalizationOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions; +}; + +template <> struct BuiltinOptionsTraits<LSTMOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions; +}; + +template <> struct BuiltinOptionsTraits<ResizeBilinearOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions; +}; + +template <> struct BuiltinOptionsTraits<CallOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_CallOptions; +}; + +template <> struct BuiltinOptionsTraits<ReshapeOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions; +}; + +template <> struct BuiltinOptionsTraits<SkipGramOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions; +}; + +template <> struct BuiltinOptionsTraits<SpaceToDepthOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions; +}; + +template <> struct BuiltinOptionsTraits<EmbeddingLookupSparseOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions; +}; + +template <> struct BuiltinOptionsTraits<MulOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_MulOptions; +}; + +template <> struct BuiltinOptionsTraits<PadOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_PadOptions; +}; + +template <> struct BuiltinOptionsTraits<GatherOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions; +}; + +template <> struct BuiltinOptionsTraits<BatchToSpaceNDOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions; +}; + +template <> struct BuiltinOptionsTraits<SpaceToBatchNDOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions; +}; + +template <> struct BuiltinOptionsTraits<TransposeOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions; +}; + +template <> struct BuiltinOptionsTraits<ReducerOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ReducerOptions; +}; + +template <> struct BuiltinOptionsTraits<SubOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SubOptions; +}; + +template <> struct BuiltinOptionsTraits<DivOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_DivOptions; +}; + +template <> struct BuiltinOptionsTraits<SqueezeOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions; +}; + +template <> struct BuiltinOptionsTraits<SequenceRNNOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions; +}; + +template <> struct BuiltinOptionsTraits<StridedSliceOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions; +}; + +template <> struct BuiltinOptionsTraits<ExpOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions; +}; + +template <> struct BuiltinOptionsTraits<TopKV2Options> +{ + static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options; +}; + +template <> struct BuiltinOptionsTraits<SplitOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions; +}; + +template <> struct BuiltinOptionsTraits<LogSoftmaxOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LogSoftmaxOptions; +}; + +template <> struct BuiltinOptionsTraits<CastOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_CastOptions; +}; + +template <> struct BuiltinOptionsTraits<DequantizeOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions; +}; + +template <> struct BuiltinOptionsTraits<MaximumMinimumOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_MaximumMinimumOptions; +}; + +template <> struct BuiltinOptionsTraits<ArgMaxOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ArgMaxOptions; +}; + +template <> struct BuiltinOptionsTraits<LessOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LessOptions; +}; + +template <> struct BuiltinOptionsTraits<NegOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_NegOptions; +}; + +template <> struct BuiltinOptionsTraits<PadV2Options> +{ + static const BuiltinOptions enum_value = BuiltinOptions_PadV2Options; +}; + +template <> struct BuiltinOptionsTraits<GreaterOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_GreaterOptions; +}; + +template <> struct BuiltinOptionsTraits<GreaterEqualOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_GreaterEqualOptions; +}; + +template <> struct BuiltinOptionsTraits<LessEqualOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LessEqualOptions; +}; + +template <> struct BuiltinOptionsTraits<SelectOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SelectOptions; +}; + +template <> struct BuiltinOptionsTraits<SliceOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SliceOptions; +}; + +template <> struct BuiltinOptionsTraits<TransposeConvOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_TransposeConvOptions; +}; + +template <> struct BuiltinOptionsTraits<SparseToDenseOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SparseToDenseOptions; +}; + +template <> struct BuiltinOptionsTraits<TileOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_TileOptions; +}; + +template <> struct BuiltinOptionsTraits<ExpandDimsOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ExpandDimsOptions; +}; + +template <> struct BuiltinOptionsTraits<EqualOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_EqualOptions; +}; + +template <> struct BuiltinOptionsTraits<NotEqualOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_NotEqualOptions; +}; + +template <> struct BuiltinOptionsTraits<ShapeOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ShapeOptions; +}; + +template <> struct BuiltinOptionsTraits<PowOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_PowOptions; +}; + +template <> struct BuiltinOptionsTraits<ArgMinOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ArgMinOptions; +}; + +template <> struct BuiltinOptionsTraits<FakeQuantOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_FakeQuantOptions; +}; + +template <> struct BuiltinOptionsTraits<PackOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_PackOptions; +}; + +template <> struct BuiltinOptionsTraits<LogicalOrOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LogicalOrOptions; +}; + +template <> struct BuiltinOptionsTraits<OneHotOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_OneHotOptions; +}; + +template <> struct BuiltinOptionsTraits<LogicalAndOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LogicalAndOptions; +}; + +template <> struct BuiltinOptionsTraits<LogicalNotOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LogicalNotOptions; +}; + +template <> struct BuiltinOptionsTraits<UnpackOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_UnpackOptions; +}; + +template <> struct BuiltinOptionsTraits<FloorDivOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_FloorDivOptions; +}; + +template <> struct BuiltinOptionsTraits<SquareOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions; +}; + +template <> struct BuiltinOptionsTraits<ZerosLikeOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions; +}; + +template <> struct BuiltinOptionsTraits<FillOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_FillOptions; +}; + +template <> struct BuiltinOptionsTraits<BidirectionalSequenceLSTMOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions; +}; + +template <> struct BuiltinOptionsTraits<BidirectionalSequenceRNNOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions; +}; + +template <> struct BuiltinOptionsTraits<UnidirectionalSequenceLSTMOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions; +}; + +template <> struct BuiltinOptionsTraits<FloorModOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions; +}; + +template <> struct BuiltinOptionsTraits<RangeOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions; +}; + +template <> struct BuiltinOptionsTraits<ResizeNearestNeighborOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ResizeNearestNeighborOptions; +}; + +template <> struct BuiltinOptionsTraits<LeakyReluOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_LeakyReluOptions; +}; + +template <> struct BuiltinOptionsTraits<SquaredDifferenceOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SquaredDifferenceOptions; +}; + +template <> struct BuiltinOptionsTraits<MirrorPadOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_MirrorPadOptions; +}; + +template <> struct BuiltinOptionsTraits<AbsOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_AbsOptions; +}; + +template <> struct BuiltinOptionsTraits<SplitVOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_SplitVOptions; +}; + +bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); +bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, + const flatbuffers::Vector<flatbuffers::Offset<void>> *values, + const flatbuffers::Vector<uint8_t> *types); + +enum Padding +{ + Padding_SAME = 0, + Padding_VALID = 1, + Padding_MIN = Padding_SAME, + Padding_MAX = Padding_VALID +}; + +inline const Padding (&EnumValuesPadding())[2] +{ + static const Padding values[] = {Padding_SAME, Padding_VALID}; + return values; +} + +inline const char *const *EnumNamesPadding() +{ + static const char *const names[] = {"SAME", "VALID", nullptr}; + return names; +} + +inline const char *EnumNamePadding(Padding e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesPadding()[index]; +} + +enum ActivationFunctionType +{ + ActivationFunctionType_NONE = 0, + ActivationFunctionType_RELU = 1, + ActivationFunctionType_RELU_N1_TO_1 = 2, + ActivationFunctionType_RELU6 = 3, + ActivationFunctionType_TANH = 4, + ActivationFunctionType_SIGN_BIT = 5, + ActivationFunctionType_MIN = ActivationFunctionType_NONE, + ActivationFunctionType_MAX = ActivationFunctionType_SIGN_BIT +}; + +inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6] +{ + static const ActivationFunctionType values[] = { + ActivationFunctionType_NONE, ActivationFunctionType_RELU, + ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6, + ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT}; + return values; +} + +inline const char *const *EnumNamesActivationFunctionType() +{ + static const char *const names[] = {"NONE", "RELU", "RELU_N1_TO_1", "RELU6", + "TANH", "SIGN_BIT", nullptr}; + return names; +} + +inline const char *EnumNameActivationFunctionType(ActivationFunctionType e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesActivationFunctionType()[index]; +} + +enum LSHProjectionType +{ + LSHProjectionType_UNKNOWN = 0, + LSHProjectionType_SPARSE = 1, + LSHProjectionType_DENSE = 2, + LSHProjectionType_MIN = LSHProjectionType_UNKNOWN, + LSHProjectionType_MAX = LSHProjectionType_DENSE +}; + +inline const LSHProjectionType (&EnumValuesLSHProjectionType())[3] +{ + static const LSHProjectionType values[] = {LSHProjectionType_UNKNOWN, LSHProjectionType_SPARSE, + LSHProjectionType_DENSE}; + return values; +} + +inline const char *const *EnumNamesLSHProjectionType() +{ + static const char *const names[] = {"UNKNOWN", "SPARSE", "DENSE", nullptr}; + return names; +} + +inline const char *EnumNameLSHProjectionType(LSHProjectionType e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesLSHProjectionType()[index]; +} + +enum FullyConnectedOptionsWeightsFormat +{ + FullyConnectedOptionsWeightsFormat_DEFAULT = 0, + FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1, + FullyConnectedOptionsWeightsFormat_MIN = FullyConnectedOptionsWeightsFormat_DEFAULT, + FullyConnectedOptionsWeightsFormat_MAX = FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 +}; + +inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2] +{ + static const FullyConnectedOptionsWeightsFormat values[] = { + FullyConnectedOptionsWeightsFormat_DEFAULT, + FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8}; + return values; +} + +inline const char *const *EnumNamesFullyConnectedOptionsWeightsFormat() +{ + static const char *const names[] = {"DEFAULT", "SHUFFLED4x16INT8", nullptr}; + return names; +} + +inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOptionsWeightsFormat e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesFullyConnectedOptionsWeightsFormat()[index]; +} + +enum LSTMKernelType +{ + LSTMKernelType_FULL = 0, + LSTMKernelType_BASIC = 1, + LSTMKernelType_MIN = LSTMKernelType_FULL, + LSTMKernelType_MAX = LSTMKernelType_BASIC +}; + +inline const LSTMKernelType (&EnumValuesLSTMKernelType())[2] +{ + static const LSTMKernelType values[] = {LSTMKernelType_FULL, LSTMKernelType_BASIC}; + return values; +} + +inline const char *const *EnumNamesLSTMKernelType() +{ + static const char *const names[] = {"FULL", "BASIC", nullptr}; + return names; +} + +inline const char *EnumNameLSTMKernelType(LSTMKernelType e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesLSTMKernelType()[index]; +} + +enum CombinerType +{ + CombinerType_SUM = 0, + CombinerType_MEAN = 1, + CombinerType_SQRTN = 2, + CombinerType_MIN = CombinerType_SUM, + CombinerType_MAX = CombinerType_SQRTN +}; + +inline const CombinerType (&EnumValuesCombinerType())[3] +{ + static const CombinerType values[] = {CombinerType_SUM, CombinerType_MEAN, CombinerType_SQRTN}; + return values; +} + +inline const char *const *EnumNamesCombinerType() +{ + static const char *const names[] = {"SUM", "MEAN", "SQRTN", nullptr}; + return names; +} + +inline const char *EnumNameCombinerType(CombinerType e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesCombinerType()[index]; +} + +enum MirrorPadMode +{ + MirrorPadMode_REFLECT = 0, + MirrorPadMode_SYMMETRIC = 1, + MirrorPadMode_MIN = MirrorPadMode_REFLECT, + MirrorPadMode_MAX = MirrorPadMode_SYMMETRIC +}; + +inline const MirrorPadMode (&EnumValuesMirrorPadMode())[2] +{ + static const MirrorPadMode values[] = {MirrorPadMode_REFLECT, MirrorPadMode_SYMMETRIC}; + return values; +} + +inline const char *const *EnumNamesMirrorPadMode() +{ + static const char *const names[] = {"REFLECT", "SYMMETRIC", nullptr}; + return names; +} + +inline const char *EnumNameMirrorPadMode(MirrorPadMode e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesMirrorPadMode()[index]; +} + +enum CustomOptionsFormat +{ + CustomOptionsFormat_FLEXBUFFERS = 0, + CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS, + CustomOptionsFormat_MAX = CustomOptionsFormat_FLEXBUFFERS +}; + +inline const CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1] +{ + static const CustomOptionsFormat values[] = {CustomOptionsFormat_FLEXBUFFERS}; + return values; +} + +inline const char *const *EnumNamesCustomOptionsFormat() +{ + static const char *const names[] = {"FLEXBUFFERS", nullptr}; + return names; +} + +inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e) +{ + const size_t index = static_cast<int>(e); + return EnumNamesCustomOptionsFormat()[index]; +} + +struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_CUSTOM = 4 + }; + const flatbuffers::Vector<uint8_t> *custom() const + { + return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CUSTOM) && + verifier.VerifyVector(custom()) && verifier.EndTable(); + } +}; + +struct CustomQuantizationBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_custom(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom) + { + fbb_.AddOffset(CustomQuantization::VT_CUSTOM, custom); + } + explicit CustomQuantizationBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + CustomQuantizationBuilder &operator=(const CustomQuantizationBuilder &); + flatbuffers::Offset<CustomQuantization> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<CustomQuantization>(end); + return o; + } +}; + +inline flatbuffers::Offset<CustomQuantization> +CreateCustomQuantization(flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom = 0) +{ + CustomQuantizationBuilder builder_(_fbb); + builder_.add_custom(custom); + return builder_.Finish(); +} + +inline flatbuffers::Offset<CustomQuantization> +CreateCustomQuantizationDirect(flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<uint8_t> *custom = nullptr) +{ + return neurun_tflite::CreateCustomQuantization(_fbb, + custom ? _fbb.CreateVector<uint8_t>(*custom) : 0); +} + +struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_MIN = 4, + VT_MAX = 6, + VT_SCALE = 8, + VT_ZERO_POINT = 10, + VT_DETAILS_TYPE = 12, + VT_DETAILS = 14 + }; + const flatbuffers::Vector<float> *min() const + { + return GetPointer<const flatbuffers::Vector<float> *>(VT_MIN); + } + const flatbuffers::Vector<float> *max() const + { + return GetPointer<const flatbuffers::Vector<float> *>(VT_MAX); + } + const flatbuffers::Vector<float> *scale() const + { + return GetPointer<const flatbuffers::Vector<float> *>(VT_SCALE); + } + const flatbuffers::Vector<int64_t> *zero_point() const + { + return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_ZERO_POINT); + } + QuantizationDetails details_type() const + { + return static_cast<QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0)); + } + const void *details() const { return GetPointer<const void *>(VT_DETAILS); } + template <typename T> const T *details_as() const; + const CustomQuantization *details_as_CustomQuantization() const + { + return details_type() == QuantizationDetails_CustomQuantization + ? static_cast<const CustomQuantization *>(details()) + : nullptr; + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_MIN) && + verifier.VerifyVector(min()) && VerifyOffset(verifier, VT_MAX) && + verifier.VerifyVector(max()) && VerifyOffset(verifier, VT_SCALE) && + verifier.VerifyVector(scale()) && VerifyOffset(verifier, VT_ZERO_POINT) && + verifier.VerifyVector(zero_point()) && VerifyField<uint8_t>(verifier, VT_DETAILS_TYPE) && + VerifyOffset(verifier, VT_DETAILS) && + VerifyQuantizationDetails(verifier, details(), details_type()) && verifier.EndTable(); + } +}; + +template <> +inline const CustomQuantization *QuantizationParameters::details_as<CustomQuantization>() const +{ + return details_as_CustomQuantization(); +} + +struct QuantizationParametersBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_min(flatbuffers::Offset<flatbuffers::Vector<float>> min) + { + fbb_.AddOffset(QuantizationParameters::VT_MIN, min); + } + void add_max(flatbuffers::Offset<flatbuffers::Vector<float>> max) + { + fbb_.AddOffset(QuantizationParameters::VT_MAX, max); + } + void add_scale(flatbuffers::Offset<flatbuffers::Vector<float>> scale) + { + fbb_.AddOffset(QuantizationParameters::VT_SCALE, scale); + } + void add_zero_point(flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point) + { + fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point); + } + void add_details_type(QuantizationDetails details_type) + { + fbb_.AddElement<uint8_t>(QuantizationParameters::VT_DETAILS_TYPE, + static_cast<uint8_t>(details_type), 0); + } + void add_details(flatbuffers::Offset<void> details) + { + fbb_.AddOffset(QuantizationParameters::VT_DETAILS, details); + } + explicit QuantizationParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &); + flatbuffers::Offset<QuantizationParameters> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<QuantizationParameters>(end); + return o; + } +}; + +inline flatbuffers::Offset<QuantizationParameters> +CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<float>> min = 0, + flatbuffers::Offset<flatbuffers::Vector<float>> max = 0, + flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0, + flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0, + QuantizationDetails details_type = QuantizationDetails_NONE, + flatbuffers::Offset<void> details = 0) +{ + QuantizationParametersBuilder builder_(_fbb); + builder_.add_details(details); + builder_.add_zero_point(zero_point); + builder_.add_scale(scale); + builder_.add_max(max); + builder_.add_min(min); + builder_.add_details_type(details_type); + return builder_.Finish(); +} + +inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect( + flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr, + const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr, + const std::vector<int64_t> *zero_point = nullptr, + QuantizationDetails details_type = QuantizationDetails_NONE, + flatbuffers::Offset<void> details = 0) +{ + return neurun_tflite::CreateQuantizationParameters( + _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0, + scale ? _fbb.CreateVector<float>(*scale) : 0, + zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details); +} + +struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_SHAPE = 4, + VT_TYPE = 6, + VT_BUFFER = 8, + VT_NAME = 10, + VT_QUANTIZATION = 12, + VT_IS_VARIABLE = 14 + }; + const flatbuffers::Vector<int32_t> *shape() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE); + } + TensorType type() const { return static_cast<TensorType>(GetField<int8_t>(VT_TYPE, 0)); } + uint32_t buffer() const { return GetField<uint32_t>(VT_BUFFER, 0); } + const flatbuffers::String *name() const + { + return GetPointer<const flatbuffers::String *>(VT_NAME); + } + const QuantizationParameters *quantization() const + { + return GetPointer<const QuantizationParameters *>(VT_QUANTIZATION); + } + bool is_variable() const { return GetField<uint8_t>(VT_IS_VARIABLE, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SHAPE) && + verifier.VerifyVector(shape()) && VerifyField<int8_t>(verifier, VT_TYPE) && + VerifyField<uint32_t>(verifier, VT_BUFFER) && VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && VerifyOffset(verifier, VT_QUANTIZATION) && + verifier.VerifyTable(quantization()) && VerifyField<uint8_t>(verifier, VT_IS_VARIABLE) && + verifier.EndTable(); + } +}; + +struct TensorBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape) + { + fbb_.AddOffset(Tensor::VT_SHAPE, shape); + } + void add_type(TensorType type) + { + fbb_.AddElement<int8_t>(Tensor::VT_TYPE, static_cast<int8_t>(type), 0); + } + void add_buffer(uint32_t buffer) { fbb_.AddElement<uint32_t>(Tensor::VT_BUFFER, buffer, 0); } + void add_name(flatbuffers::Offset<flatbuffers::String> name) + { + fbb_.AddOffset(Tensor::VT_NAME, name); + } + void add_quantization(flatbuffers::Offset<QuantizationParameters> quantization) + { + fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization); + } + void add_is_variable(bool is_variable) + { + fbb_.AddElement<uint8_t>(Tensor::VT_IS_VARIABLE, static_cast<uint8_t>(is_variable), 0); + } + explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + TensorBuilder &operator=(const TensorBuilder &); + flatbuffers::Offset<Tensor> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<Tensor>(end); + return o; + } +}; + +inline flatbuffers::Offset<Tensor> +CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0, + TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, + flatbuffers::Offset<flatbuffers::String> name = 0, + flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false) +{ + TensorBuilder builder_(_fbb); + builder_.add_quantization(quantization); + builder_.add_name(name); + builder_.add_buffer(buffer); + builder_.add_shape(shape); + builder_.add_is_variable(is_variable); + builder_.add_type(type); + return builder_.Finish(); +} + +inline flatbuffers::Offset<Tensor> CreateTensorDirect( + flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr, + TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr, + flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false) +{ + return neurun_tflite::CreateTensor(_fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, + buffer, name ? _fbb.CreateString(name) : 0, quantization, + is_variable); +} + +struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8, + VT_FUSED_ACTIVATION_FUNCTION = 10, + VT_DILATION_W_FACTOR = 12, + VT_DILATION_H_FACTOR = 14 + }; + Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); } + int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); } + int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); } + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); } + int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) && + VerifyField<int32_t>(verifier, VT_STRIDE_W) && + VerifyField<int32_t>(verifier, VT_STRIDE_H) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) && + VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable(); + } +}; + +struct Conv2DOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(Padding padding) + { + fbb_.AddElement<int8_t>(Conv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0); + } + void add_stride_w(int32_t stride_w) + { + fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) + { + fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + void add_dilation_w_factor(int32_t dilation_w_factor) + { + fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1); + } + void add_dilation_h_factor(int32_t dilation_h_factor) + { + fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1); + } + explicit Conv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &); + flatbuffers::Offset<Conv2DOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<Conv2DOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<Conv2DOptions> +CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, + int32_t stride_w = 0, int32_t stride_h = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) +{ + Conv2DOptionsBuilder builder_(_fbb); + builder_.add_dilation_h_factor(dilation_h_factor); + builder_.add_dilation_w_factor(dilation_w_factor); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8, + VT_FILTER_WIDTH = 10, + VT_FILTER_HEIGHT = 12, + VT_FUSED_ACTIVATION_FUNCTION = 14 + }; + Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); } + int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); } + int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); } + int32_t filter_width() const { return GetField<int32_t>(VT_FILTER_WIDTH, 0); } + int32_t filter_height() const { return GetField<int32_t>(VT_FILTER_HEIGHT, 0); } + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) && + VerifyField<int32_t>(verifier, VT_STRIDE_W) && + VerifyField<int32_t>(verifier, VT_STRIDE_H) && + VerifyField<int32_t>(verifier, VT_FILTER_WIDTH) && + VerifyField<int32_t>(verifier, VT_FILTER_HEIGHT) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct Pool2DOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(Padding padding) + { + fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0); + } + void add_stride_w(int32_t stride_w) + { + fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) + { + fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_filter_width(int32_t filter_width) + { + fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_WIDTH, filter_width, 0); + } + void add_filter_height(int32_t filter_height) + { + fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0); + } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit Pool2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &); + flatbuffers::Offset<Pool2DOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<Pool2DOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<Pool2DOptions> +CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, + int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, + int32_t filter_height = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + Pool2DOptionsBuilder builder_(_fbb); + builder_.add_filter_height(filter_height); + builder_.add_filter_width(filter_width); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8, + VT_DEPTH_MULTIPLIER = 10, + VT_FUSED_ACTIVATION_FUNCTION = 12, + VT_DILATION_W_FACTOR = 14, + VT_DILATION_H_FACTOR = 16 + }; + Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); } + int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); } + int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); } + int32_t depth_multiplier() const { return GetField<int32_t>(VT_DEPTH_MULTIPLIER, 0); } + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); } + int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) && + VerifyField<int32_t>(verifier, VT_STRIDE_W) && + VerifyField<int32_t>(verifier, VT_STRIDE_H) && + VerifyField<int32_t>(verifier, VT_DEPTH_MULTIPLIER) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) && + VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable(); + } +}; + +struct DepthwiseConv2DOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(Padding padding) + { + fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0); + } + void add_stride_w(int32_t stride_w) + { + fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) + { + fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_depth_multiplier(int32_t depth_multiplier) + { + fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0); + } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + void add_dilation_w_factor(int32_t dilation_w_factor) + { + fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1); + } + void add_dilation_h_factor(int32_t dilation_h_factor) + { + fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1); + } + explicit DepthwiseConv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &); + flatbuffers::Offset<DepthwiseConv2DOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<DepthwiseConv2DOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions( + flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0, + int32_t stride_h = 0, int32_t depth_multiplier = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) +{ + DepthwiseConv2DOptionsBuilder builder_(_fbb); + builder_.add_dilation_h_factor(dilation_h_factor); + builder_.add_dilation_w_factor(dilation_w_factor); + builder_.add_depth_multiplier(depth_multiplier); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_NUM_CHANNELS = 4, + VT_NUM_COLUMNS_PER_CHANNEL = 6, + VT_EMBEDDING_DIM_PER_CHANNEL = 8 + }; + int32_t num_channels() const { return GetField<int32_t>(VT_NUM_CHANNELS, 0); } + const flatbuffers::Vector<int32_t> *num_columns_per_channel() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NUM_COLUMNS_PER_CHANNEL); + } + const flatbuffers::Vector<int32_t> *embedding_dim_per_channel() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_EMBEDDING_DIM_PER_CHANNEL); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_CHANNELS) && + VerifyOffset(verifier, VT_NUM_COLUMNS_PER_CHANNEL) && + verifier.VerifyVector(num_columns_per_channel()) && + VerifyOffset(verifier, VT_EMBEDDING_DIM_PER_CHANNEL) && + verifier.VerifyVector(embedding_dim_per_channel()) && verifier.EndTable(); + } +}; + +struct ConcatEmbeddingsOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_num_channels(int32_t num_channels) + { + fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0); + } + void add_num_columns_per_channel( + flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel) + { + fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel); + } + void add_embedding_dim_per_channel( + flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel) + { + fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL, + embedding_dim_per_channel); + } + explicit ConcatEmbeddingsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &); + flatbuffers::Offset<ConcatEmbeddingsOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ConcatEmbeddingsOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions( + flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0) +{ + ConcatEmbeddingsOptionsBuilder builder_(_fbb); + builder_.add_embedding_dim_per_channel(embedding_dim_per_channel); + builder_.add_num_columns_per_channel(num_columns_per_channel); + builder_.add_num_channels(num_channels); + return builder_.Finish(); +} + +inline flatbuffers::Offset<ConcatEmbeddingsOptions> +CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0, + const std::vector<int32_t> *num_columns_per_channel = nullptr, + const std::vector<int32_t> *embedding_dim_per_channel = nullptr) +{ + return neurun_tflite::CreateConcatEmbeddingsOptions( + _fbb, num_channels, + num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0, + embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0); +} + +struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_TYPE = 4 + }; + LSHProjectionType type() const + { + return static_cast<LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_TYPE) && + verifier.EndTable(); + } +}; + +struct LSHProjectionOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_type(LSHProjectionType type) + { + fbb_.AddElement<int8_t>(LSHProjectionOptions::VT_TYPE, static_cast<int8_t>(type), 0); + } + explicit LSHProjectionOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &); + flatbuffers::Offset<LSHProjectionOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LSHProjectionOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LSHProjectionOptions> +CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, + LSHProjectionType type = LSHProjectionType_UNKNOWN) +{ + LSHProjectionOptionsBuilder builder_(_fbb); + builder_.add_type(type); + return builder_.Finish(); +} + +struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_RANK = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6 + }; + int32_t rank() const { return GetField<int32_t>(VT_RANK, 0); } + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_RANK) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct SVDFOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_rank(int32_t rank) { fbb_.AddElement<int32_t>(SVDFOptions::VT_RANK, rank, 0); } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit SVDFOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &); + flatbuffers::Offset<SVDFOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SVDFOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SVDFOptions> +CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + SVDFOptionsBuilder builder_(_fbb); + builder_.add_rank(rank); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct RNNOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit RNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + RNNOptionsBuilder &operator=(const RNNOptionsBuilder &); + flatbuffers::Offset<RNNOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<RNNOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<RNNOptions> +CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + RNNOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_TIME_MAJOR = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6 + }; + bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; } + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct SequenceRNNOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_time_major(bool time_major) + { + fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major), + 0); + } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit SequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SequenceRNNOptionsBuilder &operator=(const SequenceRNNOptionsBuilder &); + flatbuffers::Offset<SequenceRNNOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SequenceRNNOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions( + flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + SequenceRNNOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_time_major(time_major); + return builder_.Finish(); +} + +struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_TIME_MAJOR = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6, + VT_MERGE_OUTPUTS = 8 + }; + bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; } + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) && verifier.EndTable(); + } +}; + +struct BidirectionalSequenceRNNOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_time_major(bool time_major) + { + fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR, + static_cast<uint8_t>(time_major), 0); + } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + void add_merge_outputs(bool merge_outputs) + { + fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_MERGE_OUTPUTS, + static_cast<uint8_t>(merge_outputs), 0); + } + explicit BidirectionalSequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + BidirectionalSequenceRNNOptionsBuilder &operator=(const BidirectionalSequenceRNNOptionsBuilder &); + flatbuffers::Offset<BidirectionalSequenceRNNOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<BidirectionalSequenceRNNOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions( + flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + bool merge_outputs = false) +{ + BidirectionalSequenceRNNOptionsBuilder builder_(_fbb); + builder_.add_merge_outputs(merge_outputs); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_time_major(time_major); + return builder_.Finish(); +} + +struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_WEIGHTS_FORMAT = 6 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + FullyConnectedOptionsWeightsFormat weights_format() const + { + return static_cast<FullyConnectedOptionsWeightsFormat>(GetField<int8_t>(VT_WEIGHTS_FORMAT, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<int8_t>(verifier, VT_WEIGHTS_FORMAT) && verifier.EndTable(); + } +}; + +struct FullyConnectedOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + void add_weights_format(FullyConnectedOptionsWeightsFormat weights_format) + { + fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_WEIGHTS_FORMAT, + static_cast<int8_t>(weights_format), 0); + } + explicit FullyConnectedOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &); + flatbuffers::Offset<FullyConnectedOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<FullyConnectedOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions( + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT) +{ + FullyConnectedOptionsBuilder builder_(_fbb); + builder_.add_weights_format(weights_format); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_BETA = 4 + }; + float beta() const { return GetField<float>(VT_BETA, 0.0f); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_BETA) && + verifier.EndTable(); + } +}; + +struct SoftmaxOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_beta(float beta) { fbb_.AddElement<float>(SoftmaxOptions::VT_BETA, beta, 0.0f); } + explicit SoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &); + flatbuffers::Offset<SoftmaxOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SoftmaxOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SoftmaxOptions> +CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, float beta = 0.0f) +{ + SoftmaxOptionsBuilder builder_(_fbb); + builder_.add_beta(beta); + return builder_.Finish(); +} + +struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_AXIS = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6 + }; + int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); } + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct ConcatenationOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(ConcatenationOptions::VT_AXIS, axis, 0); } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit ConcatenationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &); + flatbuffers::Offset<ConcatenationOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ConcatenationOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions( + flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + ConcatenationOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct AddOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + AddOptionsBuilder &operator=(const AddOptionsBuilder &); + flatbuffers::Offset<AddOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<AddOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<AddOptions> +CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + AddOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct MulOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(MulOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit MulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + MulOptionsBuilder &operator=(const MulOptionsBuilder &); + flatbuffers::Offset<MulOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<MulOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<MulOptions> +CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + MulOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct L2NormOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit L2NormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &); + flatbuffers::Offset<L2NormOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<L2NormOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<L2NormOptions> +CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + L2NormOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_RADIUS = 4, + VT_BIAS = 6, + VT_ALPHA = 8, + VT_BETA = 10 + }; + int32_t radius() const { return GetField<int32_t>(VT_RADIUS, 0); } + float bias() const { return GetField<float>(VT_BIAS, 0.0f); } + float alpha() const { return GetField<float>(VT_ALPHA, 0.0f); } + float beta() const { return GetField<float>(VT_BETA, 0.0f); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_RADIUS) && + VerifyField<float>(verifier, VT_BIAS) && VerifyField<float>(verifier, VT_ALPHA) && + VerifyField<float>(verifier, VT_BETA) && verifier.EndTable(); + } +}; + +struct LocalResponseNormalizationOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_radius(int32_t radius) + { + fbb_.AddElement<int32_t>(LocalResponseNormalizationOptions::VT_RADIUS, radius, 0); + } + void add_bias(float bias) + { + fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BIAS, bias, 0.0f); + } + void add_alpha(float alpha) + { + fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_ALPHA, alpha, 0.0f); + } + void add_beta(float beta) + { + fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f); + } + explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LocalResponseNormalizationOptionsBuilder & + operator=(const LocalResponseNormalizationOptionsBuilder &); + flatbuffers::Offset<LocalResponseNormalizationOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LocalResponseNormalizationOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LocalResponseNormalizationOptions> +CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t radius = 0, + float bias = 0.0f, float alpha = 0.0f, float beta = 0.0f) +{ + LocalResponseNormalizationOptionsBuilder builder_(_fbb); + builder_.add_beta(beta); + builder_.add_alpha(alpha); + builder_.add_bias(bias); + builder_.add_radius(radius); + return builder_.Finish(); +} + +struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_CELL_CLIP = 6, + VT_PROJ_CLIP = 8, + VT_KERNEL_TYPE = 10 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); } + float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); } + LSTMKernelType kernel_type() const + { + return static_cast<LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<float>(verifier, VT_CELL_CLIP) && + VerifyField<float>(verifier, VT_PROJ_CLIP) && + VerifyField<int8_t>(verifier, VT_KERNEL_TYPE) && verifier.EndTable(); + } +}; + +struct LSTMOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + void add_cell_clip(float cell_clip) + { + fbb_.AddElement<float>(LSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f); + } + void add_proj_clip(float proj_clip) + { + fbb_.AddElement<float>(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); + } + void add_kernel_type(LSTMKernelType kernel_type) + { + fbb_.AddElement<int8_t>(LSTMOptions::VT_KERNEL_TYPE, static_cast<int8_t>(kernel_type), 0); + } + explicit LSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &); + flatbuffers::Offset<LSTMOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LSTMOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LSTMOptions> +CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + float cell_clip = 0.0f, float proj_clip = 0.0f, + LSTMKernelType kernel_type = LSTMKernelType_FULL) +{ + LSTMOptionsBuilder builder_(_fbb); + builder_.add_proj_clip(proj_clip); + builder_.add_cell_clip(cell_clip); + builder_.add_kernel_type(kernel_type); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_CELL_CLIP = 6, + VT_PROJ_CLIP = 8, + VT_TIME_MAJOR = 10 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); } + float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); } + bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<float>(verifier, VT_CELL_CLIP) && + VerifyField<float>(verifier, VT_PROJ_CLIP) && + VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) && verifier.EndTable(); + } +}; + +struct UnidirectionalSequenceLSTMOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + void add_cell_clip(float cell_clip) + { + fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f); + } + void add_proj_clip(float proj_clip) + { + fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); + } + void add_time_major(bool time_major) + { + fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_TIME_MAJOR, + static_cast<uint8_t>(time_major), 0); + } + explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + UnidirectionalSequenceLSTMOptionsBuilder & + operator=(const UnidirectionalSequenceLSTMOptionsBuilder &); + flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> +CreateUnidirectionalSequenceLSTMOptions( + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false) +{ + UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); + builder_.add_proj_clip(proj_clip); + builder_.add_cell_clip(cell_clip); + builder_.add_time_major(time_major); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_CELL_CLIP = 6, + VT_PROJ_CLIP = 8, + VT_MERGE_OUTPUTS = 10 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); } + float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); } + bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<float>(verifier, VT_CELL_CLIP) && + VerifyField<float>(verifier, VT_PROJ_CLIP) && + VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) && verifier.EndTable(); + } +}; + +struct BidirectionalSequenceLSTMOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + void add_cell_clip(float cell_clip) + { + fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f); + } + void add_proj_clip(float proj_clip) + { + fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); + } + void add_merge_outputs(bool merge_outputs) + { + fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_MERGE_OUTPUTS, + static_cast<uint8_t>(merge_outputs), 0); + } + explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + BidirectionalSequenceLSTMOptionsBuilder & + operator=(const BidirectionalSequenceLSTMOptionsBuilder &); + flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<BidirectionalSequenceLSTMOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions( + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false) +{ + BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); + builder_.add_proj_clip(proj_clip); + builder_.add_cell_clip(cell_clip); + builder_.add_merge_outputs(merge_outputs); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_ALIGN_CORNERS = 8 + }; + bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) && + verifier.EndTable(); + } +}; + +struct ResizeBilinearOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_align_corners(bool align_corners) + { + fbb_.AddElement<uint8_t>(ResizeBilinearOptions::VT_ALIGN_CORNERS, + static_cast<uint8_t>(align_corners), 0); + } + explicit ResizeBilinearOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &); + flatbuffers::Offset<ResizeBilinearOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ResizeBilinearOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ResizeBilinearOptions> +CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false) +{ + ResizeBilinearOptionsBuilder builder_(_fbb); + builder_.add_align_corners(align_corners); + return builder_.Finish(); +} + +struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_ALIGN_CORNERS = 4 + }; + bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) && + verifier.EndTable(); + } +}; + +struct ResizeNearestNeighborOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_align_corners(bool align_corners) + { + fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS, + static_cast<uint8_t>(align_corners), 0); + } + explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ResizeNearestNeighborOptionsBuilder &operator=(const ResizeNearestNeighborOptionsBuilder &); + flatbuffers::Offset<ResizeNearestNeighborOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ResizeNearestNeighborOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ResizeNearestNeighborOptions> +CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false) +{ + ResizeNearestNeighborOptionsBuilder builder_(_fbb); + builder_.add_align_corners(align_corners); + return builder_.Finish(); +} + +struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_SUBGRAPH = 4 + }; + uint32_t subgraph() const { return GetField<uint32_t>(VT_SUBGRAPH, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_SUBGRAPH) && + verifier.EndTable(); + } +}; + +struct CallOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_subgraph(uint32_t subgraph) + { + fbb_.AddElement<uint32_t>(CallOptions::VT_SUBGRAPH, subgraph, 0); + } + explicit CallOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + CallOptionsBuilder &operator=(const CallOptionsBuilder &); + flatbuffers::Offset<CallOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<CallOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb, + uint32_t subgraph = 0) +{ + CallOptionsBuilder builder_(_fbb); + builder_.add_subgraph(subgraph); + return builder_.Finish(); +} + +struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct PadOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + PadOptionsBuilder &operator=(const PadOptionsBuilder &); + flatbuffers::Offset<PadOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<PadOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<PadOptions> CreatePadOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + PadOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct PadV2OptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit PadV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + PadV2OptionsBuilder &operator=(const PadV2OptionsBuilder &); + flatbuffers::Offset<PadV2Options> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<PadV2Options>(end); + return o; + } +}; + +inline flatbuffers::Offset<PadV2Options> CreatePadV2Options(flatbuffers::FlatBufferBuilder &_fbb) +{ + PadV2OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_NEW_SHAPE = 4 + }; + const flatbuffers::Vector<int32_t> *new_shape() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NEW_SHAPE); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NEW_SHAPE) && + verifier.VerifyVector(new_shape()) && verifier.EndTable(); + } +}; + +struct ReshapeOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_new_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape) + { + fbb_.AddOffset(ReshapeOptions::VT_NEW_SHAPE, new_shape); + } + explicit ReshapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &); + flatbuffers::Offset<ReshapeOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ReshapeOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ReshapeOptions> +CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape = 0) +{ + ReshapeOptionsBuilder builder_(_fbb); + builder_.add_new_shape(new_shape); + return builder_.Finish(); +} + +inline flatbuffers::Offset<ReshapeOptions> +CreateReshapeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<int32_t> *new_shape = nullptr) +{ + return neurun_tflite::CreateReshapeOptions( + _fbb, new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0); +} + +struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct SpaceToBatchNDOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SpaceToBatchNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SpaceToBatchNDOptionsBuilder &operator=(const SpaceToBatchNDOptionsBuilder &); + flatbuffers::Offset<SpaceToBatchNDOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SpaceToBatchNDOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SpaceToBatchNDOptions> +CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + SpaceToBatchNDOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct BatchToSpaceNDOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &); + flatbuffers::Offset<BatchToSpaceNDOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<BatchToSpaceNDOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<BatchToSpaceNDOptions> +CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + BatchToSpaceNDOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_NGRAM_SIZE = 4, + VT_MAX_SKIP_SIZE = 6, + VT_INCLUDE_ALL_NGRAMS = 8 + }; + int32_t ngram_size() const { return GetField<int32_t>(VT_NGRAM_SIZE, 0); } + int32_t max_skip_size() const { return GetField<int32_t>(VT_MAX_SKIP_SIZE, 0); } + bool include_all_ngrams() const { return GetField<uint8_t>(VT_INCLUDE_ALL_NGRAMS, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NGRAM_SIZE) && + VerifyField<int32_t>(verifier, VT_MAX_SKIP_SIZE) && + VerifyField<uint8_t>(verifier, VT_INCLUDE_ALL_NGRAMS) && verifier.EndTable(); + } +}; + +struct SkipGramOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_ngram_size(int32_t ngram_size) + { + fbb_.AddElement<int32_t>(SkipGramOptions::VT_NGRAM_SIZE, ngram_size, 0); + } + void add_max_skip_size(int32_t max_skip_size) + { + fbb_.AddElement<int32_t>(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size, 0); + } + void add_include_all_ngrams(bool include_all_ngrams) + { + fbb_.AddElement<uint8_t>(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS, + static_cast<uint8_t>(include_all_ngrams), 0); + } + explicit SkipGramOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &); + flatbuffers::Offset<SkipGramOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SkipGramOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SkipGramOptions> +CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t ngram_size = 0, + int32_t max_skip_size = 0, bool include_all_ngrams = false) +{ + SkipGramOptionsBuilder builder_(_fbb); + builder_.add_max_skip_size(max_skip_size); + builder_.add_ngram_size(ngram_size); + builder_.add_include_all_ngrams(include_all_ngrams); + return builder_.Finish(); +} + +struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_BLOCK_SIZE = 4 + }; + int32_t block_size() const { return GetField<int32_t>(VT_BLOCK_SIZE, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_BLOCK_SIZE) && + verifier.EndTable(); + } +}; + +struct SpaceToDepthOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_block_size(int32_t block_size) + { + fbb_.AddElement<int32_t>(SpaceToDepthOptions::VT_BLOCK_SIZE, block_size, 0); + } + explicit SpaceToDepthOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &); + flatbuffers::Offset<SpaceToDepthOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SpaceToDepthOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SpaceToDepthOptions> +CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_size = 0) +{ + SpaceToDepthOptionsBuilder builder_(_fbb); + builder_.add_block_size(block_size); + return builder_.Finish(); +} + +struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct SubOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SubOptionsBuilder &operator=(const SubOptionsBuilder &); + flatbuffers::Offset<SubOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SubOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SubOptions> +CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + SubOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + ActivationFunctionType fused_activation_function() const + { + return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + } +}; + +struct DivOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(DivOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + explicit DivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + DivOptionsBuilder &operator=(const DivOptionsBuilder &); + flatbuffers::Offset<DivOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<DivOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<DivOptions> +CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +{ + DivOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct TopKV2OptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit TopKV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + TopKV2OptionsBuilder &operator=(const TopKV2OptionsBuilder &); + flatbuffers::Offset<TopKV2Options> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<TopKV2Options>(end); + return o; + } +}; + +inline flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(flatbuffers::FlatBufferBuilder &_fbb) +{ + TopKV2OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_COMBINER = 4 + }; + CombinerType combiner() const + { + return static_cast<CombinerType>(GetField<int8_t>(VT_COMBINER, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_COMBINER) && + verifier.EndTable(); + } +}; + +struct EmbeddingLookupSparseOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_combiner(CombinerType combiner) + { + fbb_.AddElement<int8_t>(EmbeddingLookupSparseOptions::VT_COMBINER, + static_cast<int8_t>(combiner), 0); + } + explicit EmbeddingLookupSparseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &); + flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<EmbeddingLookupSparseOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<EmbeddingLookupSparseOptions> +CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb, + CombinerType combiner = CombinerType_SUM) +{ + EmbeddingLookupSparseOptionsBuilder builder_(_fbb); + builder_.add_combiner(combiner); + return builder_.Finish(); +} + +struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_AXIS = 4 + }; + int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) && + verifier.EndTable(); + } +}; + +struct GatherOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0); } + explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + GatherOptionsBuilder &operator=(const GatherOptionsBuilder &); + flatbuffers::Offset<GatherOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<GatherOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, + int32_t axis = 0) +{ + GatherOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + return builder_.Finish(); +} + +struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct TransposeOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit TransposeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + TransposeOptionsBuilder &operator=(const TransposeOptionsBuilder &); + flatbuffers::Offset<TransposeOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<TransposeOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<TransposeOptions> +CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + TransposeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct ExpOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ExpOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ExpOptionsBuilder &operator=(const ExpOptionsBuilder &); + flatbuffers::Offset<ExpOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ExpOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ExpOptions> CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + ExpOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_KEEP_DIMS = 4 + }; + bool keep_dims() const { return GetField<uint8_t>(VT_KEEP_DIMS, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_KEEP_DIMS) && + verifier.EndTable(); + } +}; + +struct ReducerOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_keep_dims(bool keep_dims) + { + fbb_.AddElement<uint8_t>(ReducerOptions::VT_KEEP_DIMS, static_cast<uint8_t>(keep_dims), 0); + } + explicit ReducerOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ReducerOptionsBuilder &operator=(const ReducerOptionsBuilder &); + flatbuffers::Offset<ReducerOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ReducerOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ReducerOptions> +CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, bool keep_dims = false) +{ + ReducerOptionsBuilder builder_(_fbb); + builder_.add_keep_dims(keep_dims); + return builder_.Finish(); +} + +struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_SQUEEZE_DIMS = 4 + }; + const flatbuffers::Vector<int32_t> *squeeze_dims() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SQUEEZE_DIMS); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SQUEEZE_DIMS) && + verifier.VerifyVector(squeeze_dims()) && verifier.EndTable(); + } +}; + +struct SqueezeOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_squeeze_dims(flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims) + { + fbb_.AddOffset(SqueezeOptions::VT_SQUEEZE_DIMS, squeeze_dims); + } + explicit SqueezeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SqueezeOptionsBuilder &operator=(const SqueezeOptionsBuilder &); + flatbuffers::Offset<SqueezeOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SqueezeOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SqueezeOptions> +CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims = 0) +{ + SqueezeOptionsBuilder builder_(_fbb); + builder_.add_squeeze_dims(squeeze_dims); + return builder_.Finish(); +} + +inline flatbuffers::Offset<SqueezeOptions> +CreateSqueezeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<int32_t> *squeeze_dims = nullptr) +{ + return neurun_tflite::CreateSqueezeOptions( + _fbb, squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0); +} + +struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_NUM_SPLITS = 4 + }; + int32_t num_splits() const { return GetField<int32_t>(VT_NUM_SPLITS, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_SPLITS) && + verifier.EndTable(); + } +}; + +struct SplitOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_num_splits(int32_t num_splits) + { + fbb_.AddElement<int32_t>(SplitOptions::VT_NUM_SPLITS, num_splits, 0); + } + explicit SplitOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SplitOptionsBuilder &operator=(const SplitOptionsBuilder &); + flatbuffers::Offset<SplitOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SplitOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SplitOptions> CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_splits = 0) +{ + SplitOptionsBuilder builder_(_fbb); + builder_.add_num_splits(num_splits); + return builder_.Finish(); +} + +struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_NUM_SPLITS = 4 + }; + int32_t num_splits() const { return GetField<int32_t>(VT_NUM_SPLITS, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_SPLITS) && + verifier.EndTable(); + } +}; + +struct SplitVOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_num_splits(int32_t num_splits) + { + fbb_.AddElement<int32_t>(SplitVOptions::VT_NUM_SPLITS, num_splits, 0); + } + explicit SplitVOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SplitVOptionsBuilder &operator=(const SplitVOptionsBuilder &); + flatbuffers::Offset<SplitVOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SplitVOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_splits = 0) +{ + SplitVOptionsBuilder builder_(_fbb); + builder_.add_num_splits(num_splits); + return builder_.Finish(); +} + +struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_BEGIN_MASK = 4, + VT_END_MASK = 6, + VT_ELLIPSIS_MASK = 8, + VT_NEW_AXIS_MASK = 10, + VT_SHRINK_AXIS_MASK = 12 + }; + int32_t begin_mask() const { return GetField<int32_t>(VT_BEGIN_MASK, 0); } + int32_t end_mask() const { return GetField<int32_t>(VT_END_MASK, 0); } + int32_t ellipsis_mask() const { return GetField<int32_t>(VT_ELLIPSIS_MASK, 0); } + int32_t new_axis_mask() const { return GetField<int32_t>(VT_NEW_AXIS_MASK, 0); } + int32_t shrink_axis_mask() const { return GetField<int32_t>(VT_SHRINK_AXIS_MASK, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_BEGIN_MASK) && + VerifyField<int32_t>(verifier, VT_END_MASK) && + VerifyField<int32_t>(verifier, VT_ELLIPSIS_MASK) && + VerifyField<int32_t>(verifier, VT_NEW_AXIS_MASK) && + VerifyField<int32_t>(verifier, VT_SHRINK_AXIS_MASK) && verifier.EndTable(); + } +}; + +struct StridedSliceOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_begin_mask(int32_t begin_mask) + { + fbb_.AddElement<int32_t>(StridedSliceOptions::VT_BEGIN_MASK, begin_mask, 0); + } + void add_end_mask(int32_t end_mask) + { + fbb_.AddElement<int32_t>(StridedSliceOptions::VT_END_MASK, end_mask, 0); + } + void add_ellipsis_mask(int32_t ellipsis_mask) + { + fbb_.AddElement<int32_t>(StridedSliceOptions::VT_ELLIPSIS_MASK, ellipsis_mask, 0); + } + void add_new_axis_mask(int32_t new_axis_mask) + { + fbb_.AddElement<int32_t>(StridedSliceOptions::VT_NEW_AXIS_MASK, new_axis_mask, 0); + } + void add_shrink_axis_mask(int32_t shrink_axis_mask) + { + fbb_.AddElement<int32_t>(StridedSliceOptions::VT_SHRINK_AXIS_MASK, shrink_axis_mask, 0); + } + explicit StridedSliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + StridedSliceOptionsBuilder &operator=(const StridedSliceOptionsBuilder &); + flatbuffers::Offset<StridedSliceOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<StridedSliceOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<StridedSliceOptions> +CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t begin_mask = 0, + int32_t end_mask = 0, int32_t ellipsis_mask = 0, + int32_t new_axis_mask = 0, int32_t shrink_axis_mask = 0) +{ + StridedSliceOptionsBuilder builder_(_fbb); + builder_.add_shrink_axis_mask(shrink_axis_mask); + builder_.add_new_axis_mask(new_axis_mask); + builder_.add_ellipsis_mask(ellipsis_mask); + builder_.add_end_mask(end_mask); + builder_.add_begin_mask(begin_mask); + return builder_.Finish(); +} + +struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LogSoftmaxOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LogSoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LogSoftmaxOptionsBuilder &operator=(const LogSoftmaxOptionsBuilder &); + flatbuffers::Offset<LogSoftmaxOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LogSoftmaxOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LogSoftmaxOptions> +CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + LogSoftmaxOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_IN_DATA_TYPE = 4, + VT_OUT_DATA_TYPE = 6 + }; + TensorType in_data_type() const + { + return static_cast<TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0)); + } + TensorType out_data_type() const + { + return static_cast<TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_IN_DATA_TYPE) && + VerifyField<int8_t>(verifier, VT_OUT_DATA_TYPE) && verifier.EndTable(); + } +}; + +struct CastOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_in_data_type(TensorType in_data_type) + { + fbb_.AddElement<int8_t>(CastOptions::VT_IN_DATA_TYPE, static_cast<int8_t>(in_data_type), 0); + } + void add_out_data_type(TensorType out_data_type) + { + fbb_.AddElement<int8_t>(CastOptions::VT_OUT_DATA_TYPE, static_cast<int8_t>(out_data_type), 0); + } + explicit CastOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + CastOptionsBuilder &operator=(const CastOptionsBuilder &); + flatbuffers::Offset<CastOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<CastOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<CastOptions> +CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb, + TensorType in_data_type = TensorType_FLOAT32, + TensorType out_data_type = TensorType_FLOAT32) +{ + CastOptionsBuilder builder_(_fbb); + builder_.add_out_data_type(out_data_type); + builder_.add_in_data_type(in_data_type); + return builder_.Finish(); +} + +struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct DequantizeOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit DequantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + DequantizeOptionsBuilder &operator=(const DequantizeOptionsBuilder &); + flatbuffers::Offset<DequantizeOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<DequantizeOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<DequantizeOptions> +CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + DequantizeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct MaximumMinimumOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit MaximumMinimumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + MaximumMinimumOptionsBuilder &operator=(const MaximumMinimumOptionsBuilder &); + flatbuffers::Offset<MaximumMinimumOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<MaximumMinimumOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<MaximumMinimumOptions> +CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + MaximumMinimumOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct TileOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit TileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + TileOptionsBuilder &operator=(const TileOptionsBuilder &); + flatbuffers::Offset<TileOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<TileOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<TileOptions> CreateTileOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + TileOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_OUTPUT_TYPE = 4 + }; + TensorType output_type() const + { + return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE) && + verifier.EndTable(); + } +}; + +struct ArgMaxOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_output_type(TensorType output_type) + { + fbb_.AddElement<int8_t>(ArgMaxOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0); + } + explicit ArgMaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ArgMaxOptionsBuilder &operator=(const ArgMaxOptionsBuilder &); + flatbuffers::Offset<ArgMaxOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ArgMaxOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ArgMaxOptions> +CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, + TensorType output_type = TensorType_FLOAT32) +{ + ArgMaxOptionsBuilder builder_(_fbb); + builder_.add_output_type(output_type); + return builder_.Finish(); +} + +struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_OUTPUT_TYPE = 4 + }; + TensorType output_type() const + { + return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE) && + verifier.EndTable(); + } +}; + +struct ArgMinOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_output_type(TensorType output_type) + { + fbb_.AddElement<int8_t>(ArgMinOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0); + } + explicit ArgMinOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ArgMinOptionsBuilder &operator=(const ArgMinOptionsBuilder &); + flatbuffers::Offset<ArgMinOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ArgMinOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ArgMinOptions> +CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb, + TensorType output_type = TensorType_FLOAT32) +{ + ArgMinOptionsBuilder builder_(_fbb); + builder_.add_output_type(output_type); + return builder_.Finish(); +} + +struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct GreaterOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit GreaterOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + GreaterOptionsBuilder &operator=(const GreaterOptionsBuilder &); + flatbuffers::Offset<GreaterOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<GreaterOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<GreaterOptions> +CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + GreaterOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct GreaterEqualOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit GreaterEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + GreaterEqualOptionsBuilder &operator=(const GreaterEqualOptionsBuilder &); + flatbuffers::Offset<GreaterEqualOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<GreaterEqualOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<GreaterEqualOptions> +CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + GreaterEqualOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LessOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LessOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LessOptionsBuilder &operator=(const LessOptionsBuilder &); + flatbuffers::Offset<LessOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LessOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LessOptions> CreateLessOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + LessOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LessEqualOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LessEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LessEqualOptionsBuilder &operator=(const LessEqualOptionsBuilder &); + flatbuffers::Offset<LessEqualOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LessEqualOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LessEqualOptions> +CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + LessEqualOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct NegOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit NegOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + NegOptionsBuilder &operator=(const NegOptionsBuilder &); + flatbuffers::Offset<NegOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<NegOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<NegOptions> CreateNegOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + NegOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct SelectOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SelectOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SelectOptionsBuilder &operator=(const SelectOptionsBuilder &); + flatbuffers::Offset<SelectOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SelectOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SelectOptions> CreateSelectOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + SelectOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct SliceOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SliceOptionsBuilder &operator=(const SliceOptionsBuilder &); + flatbuffers::Offset<SliceOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SliceOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SliceOptions> CreateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + SliceOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8 + }; + Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); } + int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); } + int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) && + VerifyField<int32_t>(verifier, VT_STRIDE_W) && + VerifyField<int32_t>(verifier, VT_STRIDE_H) && verifier.EndTable(); + } +}; + +struct TransposeConvOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(Padding padding) + { + fbb_.AddElement<int8_t>(TransposeConvOptions::VT_PADDING, static_cast<int8_t>(padding), 0); + } + void add_stride_w(int32_t stride_w) + { + fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) + { + fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_H, stride_h, 0); + } + explicit TransposeConvOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + TransposeConvOptionsBuilder &operator=(const TransposeConvOptionsBuilder &); + flatbuffers::Offset<TransposeConvOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<TransposeConvOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<TransposeConvOptions> +CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, + int32_t stride_w = 0, int32_t stride_h = 0) +{ + TransposeConvOptionsBuilder builder_(_fbb); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_padding(padding); + return builder_.Finish(); +} + +struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct ExpandDimsOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ExpandDimsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ExpandDimsOptionsBuilder &operator=(const ExpandDimsOptionsBuilder &); + flatbuffers::Offset<ExpandDimsOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ExpandDimsOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ExpandDimsOptions> +CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + ExpandDimsOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_VALIDATE_INDICES = 4 + }; + bool validate_indices() const { return GetField<uint8_t>(VT_VALIDATE_INDICES, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_VALIDATE_INDICES) && + verifier.EndTable(); + } +}; + +struct SparseToDenseOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_validate_indices(bool validate_indices) + { + fbb_.AddElement<uint8_t>(SparseToDenseOptions::VT_VALIDATE_INDICES, + static_cast<uint8_t>(validate_indices), 0); + } + explicit SparseToDenseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SparseToDenseOptionsBuilder &operator=(const SparseToDenseOptionsBuilder &); + flatbuffers::Offset<SparseToDenseOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SparseToDenseOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SparseToDenseOptions> +CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, bool validate_indices = false) +{ + SparseToDenseOptionsBuilder builder_(_fbb); + builder_.add_validate_indices(validate_indices); + return builder_.Finish(); +} + +struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct EqualOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit EqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + EqualOptionsBuilder &operator=(const EqualOptionsBuilder &); + flatbuffers::Offset<EqualOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<EqualOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<EqualOptions> CreateEqualOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + EqualOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct NotEqualOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit NotEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + NotEqualOptionsBuilder &operator=(const NotEqualOptionsBuilder &); + flatbuffers::Offset<NotEqualOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<NotEqualOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<NotEqualOptions> +CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + NotEqualOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_OUT_TYPE = 4 + }; + TensorType out_type() const { return static_cast<TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0)); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUT_TYPE) && + verifier.EndTable(); + } +}; + +struct ShapeOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_out_type(TensorType out_type) + { + fbb_.AddElement<int8_t>(ShapeOptions::VT_OUT_TYPE, static_cast<int8_t>(out_type), 0); + } + explicit ShapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ShapeOptionsBuilder &operator=(const ShapeOptionsBuilder &); + flatbuffers::Offset<ShapeOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ShapeOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ShapeOptions> +CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, TensorType out_type = TensorType_FLOAT32) +{ + ShapeOptionsBuilder builder_(_fbb); + builder_.add_out_type(out_type); + return builder_.Finish(); +} + +struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct PowOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit PowOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + PowOptionsBuilder &operator=(const PowOptionsBuilder &); + flatbuffers::Offset<PowOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<PowOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<PowOptions> CreatePowOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + PowOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_MIN = 4, + VT_MAX = 6, + VT_NUM_BITS = 8, + VT_NARROW_RANGE = 10 + }; + float min() const { return GetField<float>(VT_MIN, 0.0f); } + float max() const { return GetField<float>(VT_MAX, 0.0f); } + int32_t num_bits() const { return GetField<int32_t>(VT_NUM_BITS, 0); } + bool narrow_range() const { return GetField<uint8_t>(VT_NARROW_RANGE, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_MIN) && + VerifyField<float>(verifier, VT_MAX) && VerifyField<int32_t>(verifier, VT_NUM_BITS) && + VerifyField<uint8_t>(verifier, VT_NARROW_RANGE) && verifier.EndTable(); + } +}; + +struct FakeQuantOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_min(float min) { fbb_.AddElement<float>(FakeQuantOptions::VT_MIN, min, 0.0f); } + void add_max(float max) { fbb_.AddElement<float>(FakeQuantOptions::VT_MAX, max, 0.0f); } + void add_num_bits(int32_t num_bits) + { + fbb_.AddElement<int32_t>(FakeQuantOptions::VT_NUM_BITS, num_bits, 0); + } + void add_narrow_range(bool narrow_range) + { + fbb_.AddElement<uint8_t>(FakeQuantOptions::VT_NARROW_RANGE, static_cast<uint8_t>(narrow_range), + 0); + } + explicit FakeQuantOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + FakeQuantOptionsBuilder &operator=(const FakeQuantOptionsBuilder &); + flatbuffers::Offset<FakeQuantOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<FakeQuantOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<FakeQuantOptions> +CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, float min = 0.0f, float max = 0.0f, + int32_t num_bits = 0, bool narrow_range = false) +{ + FakeQuantOptionsBuilder builder_(_fbb); + builder_.add_num_bits(num_bits); + builder_.add_max(max); + builder_.add_min(min); + builder_.add_narrow_range(narrow_range); + return builder_.Finish(); +} + +struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_VALUES_COUNT = 4, + VT_AXIS = 6 + }; + int32_t values_count() const { return GetField<int32_t>(VT_VALUES_COUNT, 0); } + int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_VALUES_COUNT) && + VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable(); + } +}; + +struct PackOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_values_count(int32_t values_count) + { + fbb_.AddElement<int32_t>(PackOptions::VT_VALUES_COUNT, values_count, 0); + } + void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(PackOptions::VT_AXIS, axis, 0); } + explicit PackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + PackOptionsBuilder &operator=(const PackOptionsBuilder &); + flatbuffers::Offset<PackOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<PackOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<PackOptions> +CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t values_count = 0, int32_t axis = 0) +{ + PackOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + builder_.add_values_count(values_count); + return builder_.Finish(); +} + +struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LogicalOrOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LogicalOrOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LogicalOrOptionsBuilder &operator=(const LogicalOrOptionsBuilder &); + flatbuffers::Offset<LogicalOrOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LogicalOrOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LogicalOrOptions> +CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + LogicalOrOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_AXIS = 4 + }; + int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) && + verifier.EndTable(); + } +}; + +struct OneHotOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(OneHotOptions::VT_AXIS, axis, 0); } + explicit OneHotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + OneHotOptionsBuilder &operator=(const OneHotOptionsBuilder &); + flatbuffers::Offset<OneHotOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<OneHotOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(flatbuffers::FlatBufferBuilder &_fbb, + int32_t axis = 0) +{ + OneHotOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + return builder_.Finish(); +} + +struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct AbsOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit AbsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + AbsOptionsBuilder &operator=(const AbsOptionsBuilder &); + flatbuffers::Offset<AbsOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<AbsOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<AbsOptions> CreateAbsOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + AbsOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LogicalAndOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LogicalAndOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LogicalAndOptionsBuilder &operator=(const LogicalAndOptionsBuilder &); + flatbuffers::Offset<LogicalAndOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LogicalAndOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LogicalAndOptions> +CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + LogicalAndOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LogicalNotOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LogicalNotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LogicalNotOptionsBuilder &operator=(const LogicalNotOptionsBuilder &); + flatbuffers::Offset<LogicalNotOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LogicalNotOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LogicalNotOptions> +CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + LogicalNotOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_NUM = 4, + VT_AXIS = 6 + }; + int32_t num() const { return GetField<int32_t>(VT_NUM, 0); } + int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM) && + VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable(); + } +}; + +struct UnpackOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_num(int32_t num) { fbb_.AddElement<int32_t>(UnpackOptions::VT_NUM, num, 0); } + void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(UnpackOptions::VT_AXIS, axis, 0); } + explicit UnpackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + UnpackOptionsBuilder &operator=(const UnpackOptionsBuilder &); + flatbuffers::Offset<UnpackOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<UnpackOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(flatbuffers::FlatBufferBuilder &_fbb, + int32_t num = 0, int32_t axis = 0) +{ + UnpackOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + builder_.add_num(num); + return builder_.Finish(); +} + +struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct FloorDivOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit FloorDivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + FloorDivOptionsBuilder &operator=(const FloorDivOptionsBuilder &); + flatbuffers::Offset<FloorDivOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<FloorDivOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<FloorDivOptions> +CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + FloorDivOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct SquareOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SquareOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SquareOptionsBuilder &operator=(const SquareOptionsBuilder &); + flatbuffers::Offset<SquareOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SquareOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + SquareOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct ZerosLikeOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ZerosLikeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ZerosLikeOptionsBuilder &operator=(const ZerosLikeOptionsBuilder &); + flatbuffers::Offset<ZerosLikeOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ZerosLikeOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ZerosLikeOptions> +CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + ZerosLikeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct FillOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit FillOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + FillOptionsBuilder &operator=(const FillOptionsBuilder &); + flatbuffers::Offset<FillOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<FillOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + FillOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct FloorModOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &); + flatbuffers::Offset<FloorModOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<FloorModOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<FloorModOptions> +CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + FloorModOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct RangeOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + RangeOptionsBuilder &operator=(const RangeOptionsBuilder &); + flatbuffers::Offset<RangeOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<RangeOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + RangeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_ALPHA = 4 + }; + float alpha() const { return GetField<float>(VT_ALPHA, 0.0f); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_ALPHA) && + verifier.EndTable(); + } +}; + +struct LeakyReluOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_alpha(float alpha) { fbb_.AddElement<float>(LeakyReluOptions::VT_ALPHA, alpha, 0.0f); } + explicit LeakyReluOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + LeakyReluOptionsBuilder &operator=(const LeakyReluOptionsBuilder &); + flatbuffers::Offset<LeakyReluOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<LeakyReluOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<LeakyReluOptions> +CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, float alpha = 0.0f) +{ + LeakyReluOptionsBuilder builder_(_fbb); + builder_.add_alpha(alpha); + return builder_.Finish(); +} + +struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct SquaredDifferenceOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SquaredDifferenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SquaredDifferenceOptionsBuilder &operator=(const SquaredDifferenceOptionsBuilder &); + flatbuffers::Offset<SquaredDifferenceOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SquaredDifferenceOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<SquaredDifferenceOptions> +CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + SquaredDifferenceOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_MODE = 4 + }; + MirrorPadMode mode() const { return static_cast<MirrorPadMode>(GetField<int8_t>(VT_MODE, 0)); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_MODE) && + verifier.EndTable(); + } +}; + +struct MirrorPadOptionsBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_mode(MirrorPadMode mode) + { + fbb_.AddElement<int8_t>(MirrorPadOptions::VT_MODE, static_cast<int8_t>(mode), 0); + } + explicit MirrorPadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + MirrorPadOptionsBuilder &operator=(const MirrorPadOptionsBuilder &); + flatbuffers::Offset<MirrorPadOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<MirrorPadOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<MirrorPadOptions> +CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb, + MirrorPadMode mode = MirrorPadMode_REFLECT) +{ + MirrorPadOptionsBuilder builder_(_fbb); + builder_.add_mode(mode); + return builder_.Finish(); +} + +struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_BUILTIN_CODE = 4, + VT_CUSTOM_CODE = 6, + VT_VERSION = 8 + }; + BuiltinOperator builtin_code() const + { + return static_cast<BuiltinOperator>(GetField<int8_t>(VT_BUILTIN_CODE, 0)); + } + const flatbuffers::String *custom_code() const + { + return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE); + } + int32_t version() const { return GetField<int32_t>(VT_VERSION, 1); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_BUILTIN_CODE) && + VerifyOffset(verifier, VT_CUSTOM_CODE) && verifier.VerifyString(custom_code()) && + VerifyField<int32_t>(verifier, VT_VERSION) && verifier.EndTable(); + } +}; + +struct OperatorCodeBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_builtin_code(BuiltinOperator builtin_code) + { + fbb_.AddElement<int8_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int8_t>(builtin_code), 0); + } + void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code) + { + fbb_.AddOffset(OperatorCode::VT_CUSTOM_CODE, custom_code); + } + void add_version(int32_t version) + { + fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1); + } + explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + OperatorCodeBuilder &operator=(const OperatorCodeBuilder &); + flatbuffers::Offset<OperatorCode> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<OperatorCode>(end); + return o; + } +}; + +inline flatbuffers::Offset<OperatorCode> +CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, + BuiltinOperator builtin_code = BuiltinOperator_ADD, + flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1) +{ + OperatorCodeBuilder builder_(_fbb); + builder_.add_version(version); + builder_.add_custom_code(custom_code); + builder_.add_builtin_code(builtin_code); + return builder_.Finish(); +} + +inline flatbuffers::Offset<OperatorCode> +CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb, + BuiltinOperator builtin_code = BuiltinOperator_ADD, + const char *custom_code = nullptr, int32_t version = 1) +{ + return neurun_tflite::CreateOperatorCode( + _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0, version); +} + +struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_OPCODE_INDEX = 4, + VT_INPUTS = 6, + VT_OUTPUTS = 8, + VT_BUILTIN_OPTIONS_TYPE = 10, + VT_BUILTIN_OPTIONS = 12, + VT_CUSTOM_OPTIONS = 14, + VT_CUSTOM_OPTIONS_FORMAT = 16, + VT_MUTATING_VARIABLE_INPUTS = 18 + }; + uint32_t opcode_index() const { return GetField<uint32_t>(VT_OPCODE_INDEX, 0); } + const flatbuffers::Vector<int32_t> *inputs() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS); + } + const flatbuffers::Vector<int32_t> *outputs() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS); + } + BuiltinOptions builtin_options_type() const + { + return static_cast<BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0)); + } + const void *builtin_options() const { return GetPointer<const void *>(VT_BUILTIN_OPTIONS); } + template <typename T> const T *builtin_options_as() const; + const Conv2DOptions *builtin_options_as_Conv2DOptions() const + { + return builtin_options_type() == BuiltinOptions_Conv2DOptions + ? static_cast<const Conv2DOptions *>(builtin_options()) + : nullptr; + } + const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const + { + return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions + ? static_cast<const DepthwiseConv2DOptions *>(builtin_options()) + : nullptr; + } + const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const + { + return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions + ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options()) + : nullptr; + } + const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const + { + return builtin_options_type() == BuiltinOptions_LSHProjectionOptions + ? static_cast<const LSHProjectionOptions *>(builtin_options()) + : nullptr; + } + const Pool2DOptions *builtin_options_as_Pool2DOptions() const + { + return builtin_options_type() == BuiltinOptions_Pool2DOptions + ? static_cast<const Pool2DOptions *>(builtin_options()) + : nullptr; + } + const SVDFOptions *builtin_options_as_SVDFOptions() const + { + return builtin_options_type() == BuiltinOptions_SVDFOptions + ? static_cast<const SVDFOptions *>(builtin_options()) + : nullptr; + } + const RNNOptions *builtin_options_as_RNNOptions() const + { + return builtin_options_type() == BuiltinOptions_RNNOptions + ? static_cast<const RNNOptions *>(builtin_options()) + : nullptr; + } + const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const + { + return builtin_options_type() == BuiltinOptions_FullyConnectedOptions + ? static_cast<const FullyConnectedOptions *>(builtin_options()) + : nullptr; + } + const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const + { + return builtin_options_type() == BuiltinOptions_SoftmaxOptions + ? static_cast<const SoftmaxOptions *>(builtin_options()) + : nullptr; + } + const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const + { + return builtin_options_type() == BuiltinOptions_ConcatenationOptions + ? static_cast<const ConcatenationOptions *>(builtin_options()) + : nullptr; + } + const AddOptions *builtin_options_as_AddOptions() const + { + return builtin_options_type() == BuiltinOptions_AddOptions + ? static_cast<const AddOptions *>(builtin_options()) + : nullptr; + } + const L2NormOptions *builtin_options_as_L2NormOptions() const + { + return builtin_options_type() == BuiltinOptions_L2NormOptions + ? static_cast<const L2NormOptions *>(builtin_options()) + : nullptr; + } + const LocalResponseNormalizationOptions * + builtin_options_as_LocalResponseNormalizationOptions() const + { + return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions + ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options()) + : nullptr; + } + const LSTMOptions *builtin_options_as_LSTMOptions() const + { + return builtin_options_type() == BuiltinOptions_LSTMOptions + ? static_cast<const LSTMOptions *>(builtin_options()) + : nullptr; + } + const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const + { + return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions + ? static_cast<const ResizeBilinearOptions *>(builtin_options()) + : nullptr; + } + const CallOptions *builtin_options_as_CallOptions() const + { + return builtin_options_type() == BuiltinOptions_CallOptions + ? static_cast<const CallOptions *>(builtin_options()) + : nullptr; + } + const ReshapeOptions *builtin_options_as_ReshapeOptions() const + { + return builtin_options_type() == BuiltinOptions_ReshapeOptions + ? static_cast<const ReshapeOptions *>(builtin_options()) + : nullptr; + } + const SkipGramOptions *builtin_options_as_SkipGramOptions() const + { + return builtin_options_type() == BuiltinOptions_SkipGramOptions + ? static_cast<const SkipGramOptions *>(builtin_options()) + : nullptr; + } + const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const + { + return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions + ? static_cast<const SpaceToDepthOptions *>(builtin_options()) + : nullptr; + } + const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const + { + return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions + ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options()) + : nullptr; + } + const MulOptions *builtin_options_as_MulOptions() const + { + return builtin_options_type() == BuiltinOptions_MulOptions + ? static_cast<const MulOptions *>(builtin_options()) + : nullptr; + } + const PadOptions *builtin_options_as_PadOptions() const + { + return builtin_options_type() == BuiltinOptions_PadOptions + ? static_cast<const PadOptions *>(builtin_options()) + : nullptr; + } + const GatherOptions *builtin_options_as_GatherOptions() const + { + return builtin_options_type() == BuiltinOptions_GatherOptions + ? static_cast<const GatherOptions *>(builtin_options()) + : nullptr; + } + const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const + { + return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions + ? static_cast<const BatchToSpaceNDOptions *>(builtin_options()) + : nullptr; + } + const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const + { + return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions + ? static_cast<const SpaceToBatchNDOptions *>(builtin_options()) + : nullptr; + } + const TransposeOptions *builtin_options_as_TransposeOptions() const + { + return builtin_options_type() == BuiltinOptions_TransposeOptions + ? static_cast<const TransposeOptions *>(builtin_options()) + : nullptr; + } + const ReducerOptions *builtin_options_as_ReducerOptions() const + { + return builtin_options_type() == BuiltinOptions_ReducerOptions + ? static_cast<const ReducerOptions *>(builtin_options()) + : nullptr; + } + const SubOptions *builtin_options_as_SubOptions() const + { + return builtin_options_type() == BuiltinOptions_SubOptions + ? static_cast<const SubOptions *>(builtin_options()) + : nullptr; + } + const DivOptions *builtin_options_as_DivOptions() const + { + return builtin_options_type() == BuiltinOptions_DivOptions + ? static_cast<const DivOptions *>(builtin_options()) + : nullptr; + } + const SqueezeOptions *builtin_options_as_SqueezeOptions() const + { + return builtin_options_type() == BuiltinOptions_SqueezeOptions + ? static_cast<const SqueezeOptions *>(builtin_options()) + : nullptr; + } + const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const + { + return builtin_options_type() == BuiltinOptions_SequenceRNNOptions + ? static_cast<const SequenceRNNOptions *>(builtin_options()) + : nullptr; + } + const StridedSliceOptions *builtin_options_as_StridedSliceOptions() const + { + return builtin_options_type() == BuiltinOptions_StridedSliceOptions + ? static_cast<const StridedSliceOptions *>(builtin_options()) + : nullptr; + } + const ExpOptions *builtin_options_as_ExpOptions() const + { + return builtin_options_type() == BuiltinOptions_ExpOptions + ? static_cast<const ExpOptions *>(builtin_options()) + : nullptr; + } + const TopKV2Options *builtin_options_as_TopKV2Options() const + { + return builtin_options_type() == BuiltinOptions_TopKV2Options + ? static_cast<const TopKV2Options *>(builtin_options()) + : nullptr; + } + const SplitOptions *builtin_options_as_SplitOptions() const + { + return builtin_options_type() == BuiltinOptions_SplitOptions + ? static_cast<const SplitOptions *>(builtin_options()) + : nullptr; + } + const LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const + { + return builtin_options_type() == BuiltinOptions_LogSoftmaxOptions + ? static_cast<const LogSoftmaxOptions *>(builtin_options()) + : nullptr; + } + const CastOptions *builtin_options_as_CastOptions() const + { + return builtin_options_type() == BuiltinOptions_CastOptions + ? static_cast<const CastOptions *>(builtin_options()) + : nullptr; + } + const DequantizeOptions *builtin_options_as_DequantizeOptions() const + { + return builtin_options_type() == BuiltinOptions_DequantizeOptions + ? static_cast<const DequantizeOptions *>(builtin_options()) + : nullptr; + } + const MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const + { + return builtin_options_type() == BuiltinOptions_MaximumMinimumOptions + ? static_cast<const MaximumMinimumOptions *>(builtin_options()) + : nullptr; + } + const ArgMaxOptions *builtin_options_as_ArgMaxOptions() const + { + return builtin_options_type() == BuiltinOptions_ArgMaxOptions + ? static_cast<const ArgMaxOptions *>(builtin_options()) + : nullptr; + } + const LessOptions *builtin_options_as_LessOptions() const + { + return builtin_options_type() == BuiltinOptions_LessOptions + ? static_cast<const LessOptions *>(builtin_options()) + : nullptr; + } + const NegOptions *builtin_options_as_NegOptions() const + { + return builtin_options_type() == BuiltinOptions_NegOptions + ? static_cast<const NegOptions *>(builtin_options()) + : nullptr; + } + const PadV2Options *builtin_options_as_PadV2Options() const + { + return builtin_options_type() == BuiltinOptions_PadV2Options + ? static_cast<const PadV2Options *>(builtin_options()) + : nullptr; + } + const GreaterOptions *builtin_options_as_GreaterOptions() const + { + return builtin_options_type() == BuiltinOptions_GreaterOptions + ? static_cast<const GreaterOptions *>(builtin_options()) + : nullptr; + } + const GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const + { + return builtin_options_type() == BuiltinOptions_GreaterEqualOptions + ? static_cast<const GreaterEqualOptions *>(builtin_options()) + : nullptr; + } + const LessEqualOptions *builtin_options_as_LessEqualOptions() const + { + return builtin_options_type() == BuiltinOptions_LessEqualOptions + ? static_cast<const LessEqualOptions *>(builtin_options()) + : nullptr; + } + const SelectOptions *builtin_options_as_SelectOptions() const + { + return builtin_options_type() == BuiltinOptions_SelectOptions + ? static_cast<const SelectOptions *>(builtin_options()) + : nullptr; + } + const SliceOptions *builtin_options_as_SliceOptions() const + { + return builtin_options_type() == BuiltinOptions_SliceOptions + ? static_cast<const SliceOptions *>(builtin_options()) + : nullptr; + } + const TransposeConvOptions *builtin_options_as_TransposeConvOptions() const + { + return builtin_options_type() == BuiltinOptions_TransposeConvOptions + ? static_cast<const TransposeConvOptions *>(builtin_options()) + : nullptr; + } + const SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const + { + return builtin_options_type() == BuiltinOptions_SparseToDenseOptions + ? static_cast<const SparseToDenseOptions *>(builtin_options()) + : nullptr; + } + const TileOptions *builtin_options_as_TileOptions() const + { + return builtin_options_type() == BuiltinOptions_TileOptions + ? static_cast<const TileOptions *>(builtin_options()) + : nullptr; + } + const ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const + { + return builtin_options_type() == BuiltinOptions_ExpandDimsOptions + ? static_cast<const ExpandDimsOptions *>(builtin_options()) + : nullptr; + } + const EqualOptions *builtin_options_as_EqualOptions() const + { + return builtin_options_type() == BuiltinOptions_EqualOptions + ? static_cast<const EqualOptions *>(builtin_options()) + : nullptr; + } + const NotEqualOptions *builtin_options_as_NotEqualOptions() const + { + return builtin_options_type() == BuiltinOptions_NotEqualOptions + ? static_cast<const NotEqualOptions *>(builtin_options()) + : nullptr; + } + const ShapeOptions *builtin_options_as_ShapeOptions() const + { + return builtin_options_type() == BuiltinOptions_ShapeOptions + ? static_cast<const ShapeOptions *>(builtin_options()) + : nullptr; + } + const PowOptions *builtin_options_as_PowOptions() const + { + return builtin_options_type() == BuiltinOptions_PowOptions + ? static_cast<const PowOptions *>(builtin_options()) + : nullptr; + } + const ArgMinOptions *builtin_options_as_ArgMinOptions() const + { + return builtin_options_type() == BuiltinOptions_ArgMinOptions + ? static_cast<const ArgMinOptions *>(builtin_options()) + : nullptr; + } + const FakeQuantOptions *builtin_options_as_FakeQuantOptions() const + { + return builtin_options_type() == BuiltinOptions_FakeQuantOptions + ? static_cast<const FakeQuantOptions *>(builtin_options()) + : nullptr; + } + const PackOptions *builtin_options_as_PackOptions() const + { + return builtin_options_type() == BuiltinOptions_PackOptions + ? static_cast<const PackOptions *>(builtin_options()) + : nullptr; + } + const LogicalOrOptions *builtin_options_as_LogicalOrOptions() const + { + return builtin_options_type() == BuiltinOptions_LogicalOrOptions + ? static_cast<const LogicalOrOptions *>(builtin_options()) + : nullptr; + } + const OneHotOptions *builtin_options_as_OneHotOptions() const + { + return builtin_options_type() == BuiltinOptions_OneHotOptions + ? static_cast<const OneHotOptions *>(builtin_options()) + : nullptr; + } + const LogicalAndOptions *builtin_options_as_LogicalAndOptions() const + { + return builtin_options_type() == BuiltinOptions_LogicalAndOptions + ? static_cast<const LogicalAndOptions *>(builtin_options()) + : nullptr; + } + const LogicalNotOptions *builtin_options_as_LogicalNotOptions() const + { + return builtin_options_type() == BuiltinOptions_LogicalNotOptions + ? static_cast<const LogicalNotOptions *>(builtin_options()) + : nullptr; + } + const UnpackOptions *builtin_options_as_UnpackOptions() const + { + return builtin_options_type() == BuiltinOptions_UnpackOptions + ? static_cast<const UnpackOptions *>(builtin_options()) + : nullptr; + } + const FloorDivOptions *builtin_options_as_FloorDivOptions() const + { + return builtin_options_type() == BuiltinOptions_FloorDivOptions + ? static_cast<const FloorDivOptions *>(builtin_options()) + : nullptr; + } + const SquareOptions *builtin_options_as_SquareOptions() const + { + return builtin_options_type() == BuiltinOptions_SquareOptions + ? static_cast<const SquareOptions *>(builtin_options()) + : nullptr; + } + const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const + { + return builtin_options_type() == BuiltinOptions_ZerosLikeOptions + ? static_cast<const ZerosLikeOptions *>(builtin_options()) + : nullptr; + } + const FillOptions *builtin_options_as_FillOptions() const + { + return builtin_options_type() == BuiltinOptions_FillOptions + ? static_cast<const FillOptions *>(builtin_options()) + : nullptr; + } + const BidirectionalSequenceLSTMOptions * + builtin_options_as_BidirectionalSequenceLSTMOptions() const + { + return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions + ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options()) + : nullptr; + } + const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const + { + return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions + ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options()) + : nullptr; + } + const UnidirectionalSequenceLSTMOptions * + builtin_options_as_UnidirectionalSequenceLSTMOptions() const + { + return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions + ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options()) + : nullptr; + } + const FloorModOptions *builtin_options_as_FloorModOptions() const + { + return builtin_options_type() == BuiltinOptions_FloorModOptions + ? static_cast<const FloorModOptions *>(builtin_options()) + : nullptr; + } + const RangeOptions *builtin_options_as_RangeOptions() const + { + return builtin_options_type() == BuiltinOptions_RangeOptions + ? static_cast<const RangeOptions *>(builtin_options()) + : nullptr; + } + const ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const + { + return builtin_options_type() == BuiltinOptions_ResizeNearestNeighborOptions + ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options()) + : nullptr; + } + const LeakyReluOptions *builtin_options_as_LeakyReluOptions() const + { + return builtin_options_type() == BuiltinOptions_LeakyReluOptions + ? static_cast<const LeakyReluOptions *>(builtin_options()) + : nullptr; + } + const SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const + { + return builtin_options_type() == BuiltinOptions_SquaredDifferenceOptions + ? static_cast<const SquaredDifferenceOptions *>(builtin_options()) + : nullptr; + } + const MirrorPadOptions *builtin_options_as_MirrorPadOptions() const + { + return builtin_options_type() == BuiltinOptions_MirrorPadOptions + ? static_cast<const MirrorPadOptions *>(builtin_options()) + : nullptr; + } + const AbsOptions *builtin_options_as_AbsOptions() const + { + return builtin_options_type() == BuiltinOptions_AbsOptions + ? static_cast<const AbsOptions *>(builtin_options()) + : nullptr; + } + const SplitVOptions *builtin_options_as_SplitVOptions() const + { + return builtin_options_type() == BuiltinOptions_SplitVOptions + ? static_cast<const SplitVOptions *>(builtin_options()) + : nullptr; + } + const flatbuffers::Vector<uint8_t> *custom_options() const + { + return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS); + } + CustomOptionsFormat custom_options_format() const + { + return static_cast<CustomOptionsFormat>(GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0)); + } + const flatbuffers::Vector<uint8_t> *mutating_variable_inputs() const + { + return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_MUTATING_VARIABLE_INPUTS); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_OPCODE_INDEX) && + VerifyOffset(verifier, VT_INPUTS) && verifier.VerifyVector(inputs()) && + VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) && + VerifyField<uint8_t>(verifier, VT_BUILTIN_OPTIONS_TYPE) && + VerifyOffset(verifier, VT_BUILTIN_OPTIONS) && + VerifyBuiltinOptions(verifier, builtin_options(), builtin_options_type()) && + VerifyOffset(verifier, VT_CUSTOM_OPTIONS) && verifier.VerifyVector(custom_options()) && + VerifyField<int8_t>(verifier, VT_CUSTOM_OPTIONS_FORMAT) && + VerifyOffset(verifier, VT_MUTATING_VARIABLE_INPUTS) && + verifier.VerifyVector(mutating_variable_inputs()) && verifier.EndTable(); + } +}; + +template <> inline const Conv2DOptions *Operator::builtin_options_as<Conv2DOptions>() const +{ + return builtin_options_as_Conv2DOptions(); +} + +template <> +inline const DepthwiseConv2DOptions *Operator::builtin_options_as<DepthwiseConv2DOptions>() const +{ + return builtin_options_as_DepthwiseConv2DOptions(); +} + +template <> +inline const ConcatEmbeddingsOptions *Operator::builtin_options_as<ConcatEmbeddingsOptions>() const +{ + return builtin_options_as_ConcatEmbeddingsOptions(); +} + +template <> +inline const LSHProjectionOptions *Operator::builtin_options_as<LSHProjectionOptions>() const +{ + return builtin_options_as_LSHProjectionOptions(); +} + +template <> inline const Pool2DOptions *Operator::builtin_options_as<Pool2DOptions>() const +{ + return builtin_options_as_Pool2DOptions(); +} + +template <> inline const SVDFOptions *Operator::builtin_options_as<SVDFOptions>() const +{ + return builtin_options_as_SVDFOptions(); +} + +template <> inline const RNNOptions *Operator::builtin_options_as<RNNOptions>() const +{ + return builtin_options_as_RNNOptions(); +} + +template <> +inline const FullyConnectedOptions *Operator::builtin_options_as<FullyConnectedOptions>() const +{ + return builtin_options_as_FullyConnectedOptions(); +} + +template <> inline const SoftmaxOptions *Operator::builtin_options_as<SoftmaxOptions>() const +{ + return builtin_options_as_SoftmaxOptions(); +} + +template <> +inline const ConcatenationOptions *Operator::builtin_options_as<ConcatenationOptions>() const +{ + return builtin_options_as_ConcatenationOptions(); +} + +template <> inline const AddOptions *Operator::builtin_options_as<AddOptions>() const +{ + return builtin_options_as_AddOptions(); +} + +template <> inline const L2NormOptions *Operator::builtin_options_as<L2NormOptions>() const +{ + return builtin_options_as_L2NormOptions(); +} + +template <> +inline const LocalResponseNormalizationOptions * +Operator::builtin_options_as<LocalResponseNormalizationOptions>() const +{ + return builtin_options_as_LocalResponseNormalizationOptions(); +} + +template <> inline const LSTMOptions *Operator::builtin_options_as<LSTMOptions>() const +{ + return builtin_options_as_LSTMOptions(); +} + +template <> +inline const ResizeBilinearOptions *Operator::builtin_options_as<ResizeBilinearOptions>() const +{ + return builtin_options_as_ResizeBilinearOptions(); +} + +template <> inline const CallOptions *Operator::builtin_options_as<CallOptions>() const +{ + return builtin_options_as_CallOptions(); +} + +template <> inline const ReshapeOptions *Operator::builtin_options_as<ReshapeOptions>() const +{ + return builtin_options_as_ReshapeOptions(); +} + +template <> inline const SkipGramOptions *Operator::builtin_options_as<SkipGramOptions>() const +{ + return builtin_options_as_SkipGramOptions(); +} + +template <> +inline const SpaceToDepthOptions *Operator::builtin_options_as<SpaceToDepthOptions>() const +{ + return builtin_options_as_SpaceToDepthOptions(); +} + +template <> +inline const EmbeddingLookupSparseOptions * +Operator::builtin_options_as<EmbeddingLookupSparseOptions>() const +{ + return builtin_options_as_EmbeddingLookupSparseOptions(); +} + +template <> inline const MulOptions *Operator::builtin_options_as<MulOptions>() const +{ + return builtin_options_as_MulOptions(); +} + +template <> inline const PadOptions *Operator::builtin_options_as<PadOptions>() const +{ + return builtin_options_as_PadOptions(); +} + +template <> inline const GatherOptions *Operator::builtin_options_as<GatherOptions>() const +{ + return builtin_options_as_GatherOptions(); +} + +template <> +inline const BatchToSpaceNDOptions *Operator::builtin_options_as<BatchToSpaceNDOptions>() const +{ + return builtin_options_as_BatchToSpaceNDOptions(); +} + +template <> +inline const SpaceToBatchNDOptions *Operator::builtin_options_as<SpaceToBatchNDOptions>() const +{ + return builtin_options_as_SpaceToBatchNDOptions(); +} + +template <> inline const TransposeOptions *Operator::builtin_options_as<TransposeOptions>() const +{ + return builtin_options_as_TransposeOptions(); +} + +template <> inline const ReducerOptions *Operator::builtin_options_as<ReducerOptions>() const +{ + return builtin_options_as_ReducerOptions(); +} + +template <> inline const SubOptions *Operator::builtin_options_as<SubOptions>() const +{ + return builtin_options_as_SubOptions(); +} + +template <> inline const DivOptions *Operator::builtin_options_as<DivOptions>() const +{ + return builtin_options_as_DivOptions(); +} + +template <> inline const SqueezeOptions *Operator::builtin_options_as<SqueezeOptions>() const +{ + return builtin_options_as_SqueezeOptions(); +} + +template <> +inline const SequenceRNNOptions *Operator::builtin_options_as<SequenceRNNOptions>() const +{ + return builtin_options_as_SequenceRNNOptions(); +} + +template <> +inline const StridedSliceOptions *Operator::builtin_options_as<StridedSliceOptions>() const +{ + return builtin_options_as_StridedSliceOptions(); +} + +template <> inline const ExpOptions *Operator::builtin_options_as<ExpOptions>() const +{ + return builtin_options_as_ExpOptions(); +} + +template <> inline const TopKV2Options *Operator::builtin_options_as<TopKV2Options>() const +{ + return builtin_options_as_TopKV2Options(); +} + +template <> inline const SplitOptions *Operator::builtin_options_as<SplitOptions>() const +{ + return builtin_options_as_SplitOptions(); +} + +template <> inline const LogSoftmaxOptions *Operator::builtin_options_as<LogSoftmaxOptions>() const +{ + return builtin_options_as_LogSoftmaxOptions(); +} + +template <> inline const CastOptions *Operator::builtin_options_as<CastOptions>() const +{ + return builtin_options_as_CastOptions(); +} + +template <> inline const DequantizeOptions *Operator::builtin_options_as<DequantizeOptions>() const +{ + return builtin_options_as_DequantizeOptions(); +} + +template <> +inline const MaximumMinimumOptions *Operator::builtin_options_as<MaximumMinimumOptions>() const +{ + return builtin_options_as_MaximumMinimumOptions(); +} + +template <> inline const ArgMaxOptions *Operator::builtin_options_as<ArgMaxOptions>() const +{ + return builtin_options_as_ArgMaxOptions(); +} + +template <> inline const LessOptions *Operator::builtin_options_as<LessOptions>() const +{ + return builtin_options_as_LessOptions(); +} + +template <> inline const NegOptions *Operator::builtin_options_as<NegOptions>() const +{ + return builtin_options_as_NegOptions(); +} + +template <> inline const PadV2Options *Operator::builtin_options_as<PadV2Options>() const +{ + return builtin_options_as_PadV2Options(); +} + +template <> inline const GreaterOptions *Operator::builtin_options_as<GreaterOptions>() const +{ + return builtin_options_as_GreaterOptions(); +} + +template <> +inline const GreaterEqualOptions *Operator::builtin_options_as<GreaterEqualOptions>() const +{ + return builtin_options_as_GreaterEqualOptions(); +} + +template <> inline const LessEqualOptions *Operator::builtin_options_as<LessEqualOptions>() const +{ + return builtin_options_as_LessEqualOptions(); +} + +template <> inline const SelectOptions *Operator::builtin_options_as<SelectOptions>() const +{ + return builtin_options_as_SelectOptions(); +} + +template <> inline const SliceOptions *Operator::builtin_options_as<SliceOptions>() const +{ + return builtin_options_as_SliceOptions(); +} + +template <> +inline const TransposeConvOptions *Operator::builtin_options_as<TransposeConvOptions>() const +{ + return builtin_options_as_TransposeConvOptions(); +} + +template <> +inline const SparseToDenseOptions *Operator::builtin_options_as<SparseToDenseOptions>() const +{ + return builtin_options_as_SparseToDenseOptions(); +} + +template <> inline const TileOptions *Operator::builtin_options_as<TileOptions>() const +{ + return builtin_options_as_TileOptions(); +} + +template <> inline const ExpandDimsOptions *Operator::builtin_options_as<ExpandDimsOptions>() const +{ + return builtin_options_as_ExpandDimsOptions(); +} + +template <> inline const EqualOptions *Operator::builtin_options_as<EqualOptions>() const +{ + return builtin_options_as_EqualOptions(); +} + +template <> inline const NotEqualOptions *Operator::builtin_options_as<NotEqualOptions>() const +{ + return builtin_options_as_NotEqualOptions(); +} + +template <> inline const ShapeOptions *Operator::builtin_options_as<ShapeOptions>() const +{ + return builtin_options_as_ShapeOptions(); +} + +template <> inline const PowOptions *Operator::builtin_options_as<PowOptions>() const +{ + return builtin_options_as_PowOptions(); +} + +template <> inline const ArgMinOptions *Operator::builtin_options_as<ArgMinOptions>() const +{ + return builtin_options_as_ArgMinOptions(); +} + +template <> inline const FakeQuantOptions *Operator::builtin_options_as<FakeQuantOptions>() const +{ + return builtin_options_as_FakeQuantOptions(); +} + +template <> inline const PackOptions *Operator::builtin_options_as<PackOptions>() const +{ + return builtin_options_as_PackOptions(); +} + +template <> inline const LogicalOrOptions *Operator::builtin_options_as<LogicalOrOptions>() const +{ + return builtin_options_as_LogicalOrOptions(); +} + +template <> inline const OneHotOptions *Operator::builtin_options_as<OneHotOptions>() const +{ + return builtin_options_as_OneHotOptions(); +} + +template <> inline const LogicalAndOptions *Operator::builtin_options_as<LogicalAndOptions>() const +{ + return builtin_options_as_LogicalAndOptions(); +} + +template <> inline const LogicalNotOptions *Operator::builtin_options_as<LogicalNotOptions>() const +{ + return builtin_options_as_LogicalNotOptions(); +} + +template <> inline const UnpackOptions *Operator::builtin_options_as<UnpackOptions>() const +{ + return builtin_options_as_UnpackOptions(); +} + +template <> inline const FloorDivOptions *Operator::builtin_options_as<FloorDivOptions>() const +{ + return builtin_options_as_FloorDivOptions(); +} + +template <> inline const SquareOptions *Operator::builtin_options_as<SquareOptions>() const +{ + return builtin_options_as_SquareOptions(); +} + +template <> inline const ZerosLikeOptions *Operator::builtin_options_as<ZerosLikeOptions>() const +{ + return builtin_options_as_ZerosLikeOptions(); +} + +template <> inline const FillOptions *Operator::builtin_options_as<FillOptions>() const +{ + return builtin_options_as_FillOptions(); +} + +template <> +inline const BidirectionalSequenceLSTMOptions * +Operator::builtin_options_as<BidirectionalSequenceLSTMOptions>() const +{ + return builtin_options_as_BidirectionalSequenceLSTMOptions(); +} + +template <> +inline const BidirectionalSequenceRNNOptions * +Operator::builtin_options_as<BidirectionalSequenceRNNOptions>() const +{ + return builtin_options_as_BidirectionalSequenceRNNOptions(); +} + +template <> +inline const UnidirectionalSequenceLSTMOptions * +Operator::builtin_options_as<UnidirectionalSequenceLSTMOptions>() const +{ + return builtin_options_as_UnidirectionalSequenceLSTMOptions(); +} + +template <> inline const FloorModOptions *Operator::builtin_options_as<FloorModOptions>() const +{ + return builtin_options_as_FloorModOptions(); +} + +template <> inline const RangeOptions *Operator::builtin_options_as<RangeOptions>() const +{ + return builtin_options_as_RangeOptions(); +} + +template <> +inline const ResizeNearestNeighborOptions * +Operator::builtin_options_as<ResizeNearestNeighborOptions>() const +{ + return builtin_options_as_ResizeNearestNeighborOptions(); +} + +template <> inline const LeakyReluOptions *Operator::builtin_options_as<LeakyReluOptions>() const +{ + return builtin_options_as_LeakyReluOptions(); +} + +template <> +inline const SquaredDifferenceOptions * +Operator::builtin_options_as<SquaredDifferenceOptions>() const +{ + return builtin_options_as_SquaredDifferenceOptions(); +} + +template <> inline const MirrorPadOptions *Operator::builtin_options_as<MirrorPadOptions>() const +{ + return builtin_options_as_MirrorPadOptions(); +} + +template <> inline const AbsOptions *Operator::builtin_options_as<AbsOptions>() const +{ + return builtin_options_as_AbsOptions(); +} + +template <> inline const SplitVOptions *Operator::builtin_options_as<SplitVOptions>() const +{ + return builtin_options_as_SplitVOptions(); +} + +struct OperatorBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_opcode_index(uint32_t opcode_index) + { + fbb_.AddElement<uint32_t>(Operator::VT_OPCODE_INDEX, opcode_index, 0); + } + void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs) + { + fbb_.AddOffset(Operator::VT_INPUTS, inputs); + } + void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs) + { + fbb_.AddOffset(Operator::VT_OUTPUTS, outputs); + } + void add_builtin_options_type(BuiltinOptions builtin_options_type) + { + fbb_.AddElement<uint8_t>(Operator::VT_BUILTIN_OPTIONS_TYPE, + static_cast<uint8_t>(builtin_options_type), 0); + } + void add_builtin_options(flatbuffers::Offset<void> builtin_options) + { + fbb_.AddOffset(Operator::VT_BUILTIN_OPTIONS, builtin_options); + } + void add_custom_options(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options) + { + fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options); + } + void add_custom_options_format(CustomOptionsFormat custom_options_format) + { + fbb_.AddElement<int8_t>(Operator::VT_CUSTOM_OPTIONS_FORMAT, + static_cast<int8_t>(custom_options_format), 0); + } + void add_mutating_variable_inputs( + flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs) + { + fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs); + } + explicit OperatorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + OperatorBuilder &operator=(const OperatorBuilder &); + flatbuffers::Offset<Operator> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<Operator>(end); + return o; + } +}; + +inline flatbuffers::Offset<Operator> +CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0, + BuiltinOptions builtin_options_type = BuiltinOptions_NONE, + flatbuffers::Offset<void> builtin_options = 0, + flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0, + CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS, + flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0) +{ + OperatorBuilder builder_(_fbb); + builder_.add_mutating_variable_inputs(mutating_variable_inputs); + builder_.add_custom_options(custom_options); + builder_.add_builtin_options(builtin_options); + builder_.add_outputs(outputs); + builder_.add_inputs(inputs); + builder_.add_opcode_index(opcode_index); + builder_.add_custom_options_format(custom_options_format); + builder_.add_builtin_options_type(builtin_options_type); + return builder_.Finish(); +} + +inline flatbuffers::Offset<Operator> +CreateOperatorDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, + const std::vector<int32_t> *inputs = nullptr, + const std::vector<int32_t> *outputs = nullptr, + BuiltinOptions builtin_options_type = BuiltinOptions_NONE, + flatbuffers::Offset<void> builtin_options = 0, + const std::vector<uint8_t> *custom_options = nullptr, + CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS, + const std::vector<uint8_t> *mutating_variable_inputs = nullptr) +{ + return neurun_tflite::CreateOperator( + _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0, + outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options, + custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format, + mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0); +} + +struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_TENSORS = 4, + VT_INPUTS = 6, + VT_OUTPUTS = 8, + VT_OPERATORS = 10, + VT_NAME = 12 + }; + const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *tensors() const + { + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *>(VT_TENSORS); + } + const flatbuffers::Vector<int32_t> *inputs() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS); + } + const flatbuffers::Vector<int32_t> *outputs() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS); + } + const flatbuffers::Vector<flatbuffers::Offset<Operator>> *operators() const + { + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Operator>> *>(VT_OPERATORS); + } + const flatbuffers::String *name() const + { + return GetPointer<const flatbuffers::String *>(VT_NAME); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_TENSORS) && + verifier.VerifyVector(tensors()) && verifier.VerifyVectorOfTables(tensors()) && + VerifyOffset(verifier, VT_INPUTS) && verifier.VerifyVector(inputs()) && + VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) && + VerifyOffset(verifier, VT_OPERATORS) && verifier.VerifyVector(operators()) && + verifier.VerifyVectorOfTables(operators()) && VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && verifier.EndTable(); + } +}; + +struct SubGraphBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors) + { + fbb_.AddOffset(SubGraph::VT_TENSORS, tensors); + } + void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs) + { + fbb_.AddOffset(SubGraph::VT_INPUTS, inputs); + } + void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs) + { + fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs); + } + void + add_operators(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators) + { + fbb_.AddOffset(SubGraph::VT_OPERATORS, operators); + } + void add_name(flatbuffers::Offset<flatbuffers::String> name) + { + fbb_.AddOffset(SubGraph::VT_NAME, name); + } + explicit SubGraphBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + SubGraphBuilder &operator=(const SubGraphBuilder &); + flatbuffers::Offset<SubGraph> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SubGraph>(end); + return o; + } +}; + +inline flatbuffers::Offset<SubGraph> CreateSubGraph( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0, + flatbuffers::Offset<flatbuffers::String> name = 0) +{ + SubGraphBuilder builder_(_fbb); + builder_.add_name(name); + builder_.add_operators(operators); + builder_.add_outputs(outputs); + builder_.add_inputs(inputs); + builder_.add_tensors(tensors); + return builder_.Finish(); +} + +inline flatbuffers::Offset<SubGraph> +CreateSubGraphDirect(flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr, + const std::vector<int32_t> *inputs = nullptr, + const std::vector<int32_t> *outputs = nullptr, + const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr, + const char *name = nullptr) +{ + return neurun_tflite::CreateSubGraph( + _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0, + inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0, + outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, + operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0, + name ? _fbb.CreateString(name) : 0); +} + +struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_DATA = 4 + }; + const flatbuffers::Vector<uint8_t> *data() const + { + return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_DATA); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_DATA) && + verifier.VerifyVector(data()) && verifier.EndTable(); + } +}; + +struct BufferBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_data(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data) + { + fbb_.AddOffset(Buffer::VT_DATA, data); + } + explicit BufferBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + BufferBuilder &operator=(const BufferBuilder &); + flatbuffers::Offset<Buffer> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<Buffer>(end); + return o; + } +}; + +inline flatbuffers::Offset<Buffer> +CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data = 0) +{ + BufferBuilder builder_(_fbb); + builder_.add_data(data); + return builder_.Finish(); +} + +inline flatbuffers::Offset<Buffer> CreateBufferDirect(flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<uint8_t> *data = nullptr) +{ + return neurun_tflite::CreateBuffer(_fbb, data ? _fbb.CreateVector<uint8_t>(*data) : 0); +} + +struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + enum + { + VT_VERSION = 4, + VT_OPERATOR_CODES = 6, + VT_SUBGRAPHS = 8, + VT_DESCRIPTION = 10, + VT_BUFFERS = 12, + VT_METADATA_BUFFER = 14 + }; + uint32_t version() const { return GetField<uint32_t>(VT_VERSION, 0); } + const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes() const + { + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>( + VT_OPERATOR_CODES); + } + const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *subgraphs() const + { + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *>(VT_SUBGRAPHS); + } + const flatbuffers::String *description() const + { + return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION); + } + const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *buffers() const + { + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *>(VT_BUFFERS); + } + const flatbuffers::Vector<int32_t> *metadata_buffer() const + { + return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_METADATA_BUFFER); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_VERSION) && + VerifyOffset(verifier, VT_OPERATOR_CODES) && verifier.VerifyVector(operator_codes()) && + verifier.VerifyVectorOfTables(operator_codes()) && + VerifyOffset(verifier, VT_SUBGRAPHS) && verifier.VerifyVector(subgraphs()) && + verifier.VerifyVectorOfTables(subgraphs()) && VerifyOffset(verifier, VT_DESCRIPTION) && + verifier.VerifyString(description()) && VerifyOffset(verifier, VT_BUFFERS) && + verifier.VerifyVector(buffers()) && verifier.VerifyVectorOfTables(buffers()) && + VerifyOffset(verifier, VT_METADATA_BUFFER) && verifier.VerifyVector(metadata_buffer()) && + verifier.EndTable(); + } +}; + +struct ModelBuilder +{ + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); } + void add_operator_codes( + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes) + { + fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes); + } + void + add_subgraphs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs) + { + fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs); + } + void add_description(flatbuffers::Offset<flatbuffers::String> description) + { + fbb_.AddOffset(Model::VT_DESCRIPTION, description); + } + void add_buffers(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers) + { + fbb_.AddOffset(Model::VT_BUFFERS, buffers); + } + void add_metadata_buffer(flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer) + { + fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer); + } + explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ModelBuilder &operator=(const ModelBuilder &); + flatbuffers::Offset<Model> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<Model>(end); + return o; + } +}; + +inline flatbuffers::Offset<Model> CreateModel( + flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0, + flatbuffers::Offset<flatbuffers::String> description = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0) +{ + ModelBuilder builder_(_fbb); + builder_.add_metadata_buffer(metadata_buffer); + builder_.add_buffers(buffers); + builder_.add_description(description); + builder_.add_subgraphs(subgraphs); + builder_.add_operator_codes(operator_codes); + builder_.add_version(version); + return builder_.Finish(); +} + +inline flatbuffers::Offset<Model> +CreateModelDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, + const std::vector<flatbuffers::Offset<OperatorCode>> *operator_codes = nullptr, + const std::vector<flatbuffers::Offset<SubGraph>> *subgraphs = nullptr, + const char *description = nullptr, + const std::vector<flatbuffers::Offset<Buffer>> *buffers = nullptr, + const std::vector<int32_t> *metadata_buffer = nullptr) +{ + return neurun_tflite::CreateModel( + _fbb, version, + operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0, + subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0, + description ? _fbb.CreateString(description) : 0, + buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0, + metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0); +} + +inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj, + QuantizationDetails type) +{ + switch (type) + { + case QuantizationDetails_NONE: + { + return true; + } + case QuantizationDetails_CustomQuantization: + { + auto ptr = reinterpret_cast<const CustomQuantization *>(obj); + return verifier.VerifyTable(ptr); + } + default: + return false; + } +} + +inline bool +VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier, + const flatbuffers::Vector<flatbuffers::Offset<void>> *values, + const flatbuffers::Vector<uint8_t> *types) +{ + if (!values || !types) + return !values && !types; + if (values->size() != types->size()) + return false; + for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) + { + if (!VerifyQuantizationDetails(verifier, values->Get(i), + types->GetEnum<QuantizationDetails>(i))) + { + return false; + } + } + return true; +} + +inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, + BuiltinOptions type) +{ + switch (type) + { + case BuiltinOptions_NONE: + { + return true; + } + case BuiltinOptions_Conv2DOptions: + { + auto ptr = reinterpret_cast<const Conv2DOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DepthwiseConv2DOptions: + { + auto ptr = reinterpret_cast<const DepthwiseConv2DOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ConcatEmbeddingsOptions: + { + auto ptr = reinterpret_cast<const ConcatEmbeddingsOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LSHProjectionOptions: + { + auto ptr = reinterpret_cast<const LSHProjectionOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_Pool2DOptions: + { + auto ptr = reinterpret_cast<const Pool2DOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SVDFOptions: + { + auto ptr = reinterpret_cast<const SVDFOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_RNNOptions: + { + auto ptr = reinterpret_cast<const RNNOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FullyConnectedOptions: + { + auto ptr = reinterpret_cast<const FullyConnectedOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SoftmaxOptions: + { + auto ptr = reinterpret_cast<const SoftmaxOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ConcatenationOptions: + { + auto ptr = reinterpret_cast<const ConcatenationOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_AddOptions: + { + auto ptr = reinterpret_cast<const AddOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_L2NormOptions: + { + auto ptr = reinterpret_cast<const L2NormOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LocalResponseNormalizationOptions: + { + auto ptr = reinterpret_cast<const LocalResponseNormalizationOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LSTMOptions: + { + auto ptr = reinterpret_cast<const LSTMOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ResizeBilinearOptions: + { + auto ptr = reinterpret_cast<const ResizeBilinearOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CallOptions: + { + auto ptr = reinterpret_cast<const CallOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ReshapeOptions: + { + auto ptr = reinterpret_cast<const ReshapeOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SkipGramOptions: + { + auto ptr = reinterpret_cast<const SkipGramOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SpaceToDepthOptions: + { + auto ptr = reinterpret_cast<const SpaceToDepthOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_EmbeddingLookupSparseOptions: + { + auto ptr = reinterpret_cast<const EmbeddingLookupSparseOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_MulOptions: + { + auto ptr = reinterpret_cast<const MulOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_PadOptions: + { + auto ptr = reinterpret_cast<const PadOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_GatherOptions: + { + auto ptr = reinterpret_cast<const GatherOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BatchToSpaceNDOptions: + { + auto ptr = reinterpret_cast<const BatchToSpaceNDOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SpaceToBatchNDOptions: + { + auto ptr = reinterpret_cast<const SpaceToBatchNDOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_TransposeOptions: + { + auto ptr = reinterpret_cast<const TransposeOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ReducerOptions: + { + auto ptr = reinterpret_cast<const ReducerOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SubOptions: + { + auto ptr = reinterpret_cast<const SubOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DivOptions: + { + auto ptr = reinterpret_cast<const DivOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SqueezeOptions: + { + auto ptr = reinterpret_cast<const SqueezeOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SequenceRNNOptions: + { + auto ptr = reinterpret_cast<const SequenceRNNOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_StridedSliceOptions: + { + auto ptr = reinterpret_cast<const StridedSliceOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ExpOptions: + { + auto ptr = reinterpret_cast<const ExpOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_TopKV2Options: + { + auto ptr = reinterpret_cast<const TopKV2Options *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SplitOptions: + { + auto ptr = reinterpret_cast<const SplitOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LogSoftmaxOptions: + { + auto ptr = reinterpret_cast<const LogSoftmaxOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CastOptions: + { + auto ptr = reinterpret_cast<const CastOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DequantizeOptions: + { + auto ptr = reinterpret_cast<const DequantizeOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_MaximumMinimumOptions: + { + auto ptr = reinterpret_cast<const MaximumMinimumOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ArgMaxOptions: + { + auto ptr = reinterpret_cast<const ArgMaxOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LessOptions: + { + auto ptr = reinterpret_cast<const LessOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_NegOptions: + { + auto ptr = reinterpret_cast<const NegOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_PadV2Options: + { + auto ptr = reinterpret_cast<const PadV2Options *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_GreaterOptions: + { + auto ptr = reinterpret_cast<const GreaterOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_GreaterEqualOptions: + { + auto ptr = reinterpret_cast<const GreaterEqualOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LessEqualOptions: + { + auto ptr = reinterpret_cast<const LessEqualOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SelectOptions: + { + auto ptr = reinterpret_cast<const SelectOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SliceOptions: + { + auto ptr = reinterpret_cast<const SliceOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_TransposeConvOptions: + { + auto ptr = reinterpret_cast<const TransposeConvOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SparseToDenseOptions: + { + auto ptr = reinterpret_cast<const SparseToDenseOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_TileOptions: + { + auto ptr = reinterpret_cast<const TileOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ExpandDimsOptions: + { + auto ptr = reinterpret_cast<const ExpandDimsOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_EqualOptions: + { + auto ptr = reinterpret_cast<const EqualOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_NotEqualOptions: + { + auto ptr = reinterpret_cast<const NotEqualOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ShapeOptions: + { + auto ptr = reinterpret_cast<const ShapeOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_PowOptions: + { + auto ptr = reinterpret_cast<const PowOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ArgMinOptions: + { + auto ptr = reinterpret_cast<const ArgMinOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FakeQuantOptions: + { + auto ptr = reinterpret_cast<const FakeQuantOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_PackOptions: + { + auto ptr = reinterpret_cast<const PackOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LogicalOrOptions: + { + auto ptr = reinterpret_cast<const LogicalOrOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_OneHotOptions: + { + auto ptr = reinterpret_cast<const OneHotOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LogicalAndOptions: + { + auto ptr = reinterpret_cast<const LogicalAndOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LogicalNotOptions: + { + auto ptr = reinterpret_cast<const LogicalNotOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UnpackOptions: + { + auto ptr = reinterpret_cast<const UnpackOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FloorDivOptions: + { + auto ptr = reinterpret_cast<const FloorDivOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SquareOptions: + { + auto ptr = reinterpret_cast<const SquareOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ZerosLikeOptions: + { + auto ptr = reinterpret_cast<const ZerosLikeOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FillOptions: + { + auto ptr = reinterpret_cast<const FillOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BidirectionalSequenceLSTMOptions: + { + auto ptr = reinterpret_cast<const BidirectionalSequenceLSTMOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BidirectionalSequenceRNNOptions: + { + auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: + { + auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FloorModOptions: + { + auto ptr = reinterpret_cast<const FloorModOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_RangeOptions: + { + auto ptr = reinterpret_cast<const RangeOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ResizeNearestNeighborOptions: + { + auto ptr = reinterpret_cast<const ResizeNearestNeighborOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LeakyReluOptions: + { + auto ptr = reinterpret_cast<const LeakyReluOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SquaredDifferenceOptions: + { + auto ptr = reinterpret_cast<const SquaredDifferenceOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_MirrorPadOptions: + { + auto ptr = reinterpret_cast<const MirrorPadOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_AbsOptions: + { + auto ptr = reinterpret_cast<const AbsOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SplitVOptions: + { + auto ptr = reinterpret_cast<const SplitVOptions *>(obj); + return verifier.VerifyTable(ptr); + } + default: + return false; + } +} + +inline bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, + const flatbuffers::Vector<flatbuffers::Offset<void>> *values, + const flatbuffers::Vector<uint8_t> *types) +{ + if (!values || !types) + return !values && !types; + if (values->size() != types->size()) + return false; + for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) + { + if (!VerifyBuiltinOptions(verifier, values->Get(i), types->GetEnum<BuiltinOptions>(i))) + { + return false; + } + } + return true; +} + +inline const neurun_tflite::Model *GetModel(const void *buf) +{ + return flatbuffers::GetRoot<neurun_tflite::Model>(buf); +} + +inline const neurun_tflite::Model *GetSizePrefixedModel(const void *buf) +{ + return flatbuffers::GetSizePrefixedRoot<neurun_tflite::Model>(buf); +} + +inline const char *ModelIdentifier() { return "TFL3"; } + +inline bool ModelBufferHasIdentifier(const void *buf) +{ + return flatbuffers::BufferHasIdentifier(buf, ModelIdentifier()); +} + +inline bool VerifyModelBuffer(flatbuffers::Verifier &verifier) +{ + return verifier.VerifyBuffer<neurun_tflite::Model>(ModelIdentifier()); +} + +inline bool VerifySizePrefixedModelBuffer(flatbuffers::Verifier &verifier) +{ + return verifier.VerifySizePrefixedBuffer<neurun_tflite::Model>(ModelIdentifier()); +} + +inline const char *ModelExtension() { return "tflite"; } + +inline void FinishModelBuffer(flatbuffers::FlatBufferBuilder &fbb, + flatbuffers::Offset<neurun_tflite::Model> root) +{ + fbb.Finish(root, ModelIdentifier()); +} + +inline void FinishSizePrefixedModelBuffer(flatbuffers::FlatBufferBuilder &fbb, + flatbuffers::Offset<neurun_tflite::Model> root) +{ + fbb.FinishSizePrefixed(root, ModelIdentifier()); +} + +} // namespace neurun_tflite + +#endif // FLATBUFFERS_GENERATED_SCHEMA_NEURUN_TFLITE_H_ diff --git a/runtime/neurun/frontend/tflite/tflite_schema.fbs b/runtime/neurun/frontend/tflite/tflite_schema.fbs new file mode 100644 index 000000000..ede4dfa3a --- /dev/null +++ b/runtime/neurun/frontend/tflite/tflite_schema.fbs @@ -0,0 +1,795 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Revision History +// Version 0: Initial version. +// Version 1: Add subgraphs to schema. +// Version 2: Rename operators to conform to NN API. +// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers. + +// Change namespace to neurun_tflite +namespace neurun_tflite; + +// This corresponds to the version. +file_identifier "TFL3"; +// File extension of any written files. +file_extension "tflite"; + +// IMPORTANT: All new members of tables, enums and unions must be added at the +// end to ensure backwards compatibility. + +// The type of data stored in a tensor. +enum TensorType : byte { + FLOAT32 = 0, + FLOAT16 = 1, + INT32 = 2, + UINT8 = 3, + INT64 = 4, + STRING = 5, + BOOL = 6, + INT16 = 7, + COMPLEX64 = 8, + INT8 = 9, +} + +// Custom quantization parameters for experimenting with new quantization +// techniques. +table CustomQuantization { + custom:[ubyte] (force_align: 16); +} + +// Represents a specific quantization technique's parameters. +union QuantizationDetails { + CustomQuantization, +} + +// Parameters for converting a quantized tensor back to float. +table QuantizationParameters { + // These four parameters are the asymmetric linear quantization parameters. + // Given a quantized value q, the corresponding float value f should be: + // f = scale * (q - zero_point) + // For other quantization types, the QuantizationDetails below is used. + min:[float]; // For importing back into tensorflow. + max:[float]; // For importing back into tensorflow. + scale:[float]; // For dequantizing the tensor's values. + zero_point:[long]; + + // If this is not none, the quantization parameters above are ignored and the + // value of the QuantizationDetails union below should be used. + details:QuantizationDetails; +} + +table Tensor { + // The tensor shape. The meaning of each entry is operator-specific but + // builtin ops use: [batch size, height, width, number of channels] (That's + // Tensorflow's NHWC). + shape:[int]; + type:TensorType; + // An index that refers to the buffers table at the root of the model. Or, + // if there is no data buffer associated (i.e. intermediate results), then + // this is 0 (which refers to an always existent empty buffer). + // + // The data_buffer itself is an opaque container, with the assumption that the + // target device is little-endian. In addition, all builtin operators assume + // the memory is ordered such that if `shape` is [4, 3, 2], then index + // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k]. + buffer:uint; + name:string; // For debugging and importing back into tensorflow. + quantization:QuantizationParameters; // Optional. + + is_variable:bool = false; +} + +// A list of builtin operators. Builtin operators are slightly faster than custom +// ones, but not by much. Moreover, while custom operators accept an opaque +// object containing configuration parameters, builtins have a predetermined +// set of acceptable options. +enum BuiltinOperator : byte { + ADD = 0, + AVERAGE_POOL_2D = 1, + CONCATENATION = 2, + CONV_2D = 3, + DEPTHWISE_CONV_2D = 4, + // DEPTH_TO_SPACE = 5, + DEQUANTIZE = 6, + EMBEDDING_LOOKUP = 7, + FLOOR = 8, + FULLY_CONNECTED = 9, + HASHTABLE_LOOKUP = 10, + L2_NORMALIZATION = 11, + L2_POOL_2D = 12, + LOCAL_RESPONSE_NORMALIZATION = 13, + LOGISTIC = 14, + LSH_PROJECTION = 15, + LSTM = 16, + MAX_POOL_2D = 17, + MUL = 18, + RELU = 19, + // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed + // since different model developers use RELU1 in different ways. Never + // create another op called RELU1. + RELU_N1_TO_1 = 20, + RELU6 = 21, + RESHAPE = 22, + RESIZE_BILINEAR = 23, + RNN = 24, + SOFTMAX = 25, + SPACE_TO_DEPTH = 26, + SVDF = 27, + TANH = 28, + // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS + CONCAT_EMBEDDINGS = 29, + SKIP_GRAM = 30, + CALL = 31, + CUSTOM = 32, + EMBEDDING_LOOKUP_SPARSE = 33, + PAD = 34, + UNIDIRECTIONAL_SEQUENCE_RNN = 35, + GATHER = 36, + BATCH_TO_SPACE_ND = 37, + SPACE_TO_BATCH_ND = 38, + TRANSPOSE = 39, + MEAN = 40, + SUB = 41, + DIV = 42, + SQUEEZE = 43, + UNIDIRECTIONAL_SEQUENCE_LSTM = 44, + STRIDED_SLICE = 45, + BIDIRECTIONAL_SEQUENCE_RNN = 46, + EXP = 47, + TOPK_V2 = 48, + SPLIT = 49, + LOG_SOFTMAX = 50, + // DELEGATE is a special op type for the operations which are delegated to + // other backends. + // WARNING: Experimental interface, subject to change + DELEGATE = 51, + BIDIRECTIONAL_SEQUENCE_LSTM = 52, + CAST = 53, + PRELU = 54, + MAXIMUM = 55, + ARG_MAX = 56, + MINIMUM = 57, + LESS = 58, + NEG = 59, + PADV2 = 60, + GREATER = 61, + GREATER_EQUAL = 62, + LESS_EQUAL = 63, + SELECT = 64, + SLICE = 65, + SIN = 66, + TRANSPOSE_CONV = 67, + SPARSE_TO_DENSE = 68, + TILE = 69, + EXPAND_DIMS = 70, + EQUAL = 71, + NOT_EQUAL = 72, + LOG = 73, + SUM = 74, + SQRT = 75, + RSQRT = 76, + SHAPE = 77, + POW = 78, + ARG_MIN = 79, + FAKE_QUANT = 80, + REDUCE_PROD = 81, + REDUCE_MAX = 82, + PACK = 83, + LOGICAL_OR = 84, + ONE_HOT = 85, + LOGICAL_AND = 86, + LOGICAL_NOT = 87, + UNPACK = 88, + REDUCE_MIN = 89, + FLOOR_DIV = 90, + REDUCE_ANY = 91, + SQUARE = 92, + ZEROS_LIKE = 93, + FILL = 94, + FLOOR_MOD = 95, + RANGE = 96, + RESIZE_NEAREST_NEIGHBOR = 97, + LEAKY_RELU = 98, + SQUARED_DIFFERENCE = 99, + MIRROR_PAD = 100, + ABS = 101, + SPLIT_V = 102, +} + +// Options for the builtin operators. +union BuiltinOptions { + Conv2DOptions, + DepthwiseConv2DOptions, + ConcatEmbeddingsOptions, + LSHProjectionOptions, + Pool2DOptions, + SVDFOptions, + RNNOptions, + FullyConnectedOptions, + SoftmaxOptions, + ConcatenationOptions, + AddOptions, + L2NormOptions, + LocalResponseNormalizationOptions, + LSTMOptions, + ResizeBilinearOptions, + CallOptions, + ReshapeOptions, + SkipGramOptions, + SpaceToDepthOptions, + EmbeddingLookupSparseOptions, + MulOptions, + PadOptions, + GatherOptions, + BatchToSpaceNDOptions, + SpaceToBatchNDOptions, + TransposeOptions, + ReducerOptions, + SubOptions, + DivOptions, + SqueezeOptions, + SequenceRNNOptions, + StridedSliceOptions, + ExpOptions, + TopKV2Options, + SplitOptions, + LogSoftmaxOptions, + CastOptions, + DequantizeOptions, + MaximumMinimumOptions, + ArgMaxOptions, + LessOptions, + NegOptions, + PadV2Options, + GreaterOptions, + GreaterEqualOptions, + LessEqualOptions, + SelectOptions, + SliceOptions, + TransposeConvOptions, + SparseToDenseOptions, + TileOptions, + ExpandDimsOptions, + EqualOptions, + NotEqualOptions, + ShapeOptions, + PowOptions, + ArgMinOptions, + FakeQuantOptions, + PackOptions, + LogicalOrOptions, + OneHotOptions, + LogicalAndOptions, + LogicalNotOptions, + UnpackOptions, + FloorDivOptions, + SquareOptions, + ZerosLikeOptions, + FillOptions, + BidirectionalSequenceLSTMOptions, + BidirectionalSequenceRNNOptions, + UnidirectionalSequenceLSTMOptions, + FloorModOptions, + RangeOptions, + ResizeNearestNeighborOptions, + LeakyReluOptions, + SquaredDifferenceOptions, + MirrorPadOptions, + AbsOptions, + SplitVOptions, +} + +enum Padding : byte { SAME, VALID } + +enum ActivationFunctionType : byte { + NONE = 0, + RELU = 1, + RELU_N1_TO_1 = 2, + RELU6 = 3, + TANH = 4, + SIGN_BIT = 5, +} + +table Conv2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + fused_activation_function:ActivationFunctionType; + dilation_w_factor:int = 1; + dilation_h_factor:int = 1; +} + +table Pool2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + filter_width:int; + filter_height:int; + fused_activation_function:ActivationFunctionType; +} + +table DepthwiseConv2DOptions { + // Parameters for DepthwiseConv version 1 or above. + padding:Padding; + stride_w:int; + stride_h:int; + depth_multiplier:int; + fused_activation_function:ActivationFunctionType; + // Parameters for DepthwiseConv version 2 or above. + dilation_w_factor:int = 1; + dilation_h_factor:int = 1; +} + +table ConcatEmbeddingsOptions { + num_channels:int; + num_columns_per_channel:[int]; + embedding_dim_per_channel:[int]; // This could be inferred from parameters. +} + +enum LSHProjectionType: byte { + UNKNOWN = 0, + SPARSE = 1, + DENSE = 2, +} + +table LSHProjectionOptions { + type: LSHProjectionType; +} + +table SVDFOptions { + rank:int; + fused_activation_function:ActivationFunctionType; +} + +// An implementation of TensorFlow RNNCell. +table RNNOptions { + fused_activation_function:ActivationFunctionType; +} + +// An implementation of TensorFlow dynamic_rnn with RNNCell. +table SequenceRNNOptions { + time_major:bool; + fused_activation_function:ActivationFunctionType; +} + +// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell. +table BidirectionalSequenceRNNOptions { + time_major:bool; + fused_activation_function:ActivationFunctionType; + merge_outputs: bool; +} + +enum FullyConnectedOptionsWeightsFormat: byte { + DEFAULT = 0, + SHUFFLED4x16INT8 = 1, +} + +// An implementation of TensorFlow fully_connected (a.k.a Dense) layer. +table FullyConnectedOptions { + // Parameters for FullyConnected version 1 or above. + fused_activation_function:ActivationFunctionType; + + // Parameters for FullyConnected version 2 or above. + weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT; +} + +table SoftmaxOptions { + beta: float; +} + +// An implementation of TensorFlow concat. +table ConcatenationOptions { + axis:int; + fused_activation_function:ActivationFunctionType; +} + +table AddOptions { + fused_activation_function:ActivationFunctionType; +} + +table MulOptions { + fused_activation_function:ActivationFunctionType; +} + +table L2NormOptions { + fused_activation_function:ActivationFunctionType; +} + +table LocalResponseNormalizationOptions { + radius:int; + bias:float; + alpha:float; + beta:float; +} + +enum LSTMKernelType : byte { + // Full LSTM kernel which supports peephole and projection. + FULL = 0, + // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell. + BASIC = 1, +} + +// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell +table LSTMOptions { + // Parameters for LSTM version 1 or above. + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping + + // Parameters for LSTM version 2 or above. + // Basic kernel is only supported in version 2 or above. + kernel_type: LSTMKernelType = FULL; +} + +// An implementation of TensorFlow dynamic_rnn with LSTMCell. +table UnidirectionalSequenceLSTMOptions { + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping + + // If true then first dimension is sequence, otherwise batch. + time_major:bool; +} + +table BidirectionalSequenceLSTMOptions { + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping + + // If true, store the outputs of both directions into the first output. + merge_outputs: bool; +} + +table ResizeBilinearOptions { + new_height: int (deprecated); + new_width: int (deprecated); + align_corners: bool; +} + +table ResizeNearestNeighborOptions { + align_corners: bool; +} + +// A call operation options +table CallOptions { + // The subgraph index that needs to be called. + subgraph:uint; +} + +table PadOptions { +} + +table PadV2Options { +} + +table ReshapeOptions { + new_shape:[int]; +} + +table SpaceToBatchNDOptions { +} + +table BatchToSpaceNDOptions { +} + +table SkipGramOptions { + ngram_size: int; + max_skip_size: int; + include_all_ngrams: bool; +} + +table SpaceToDepthOptions { + block_size: int; +} + +table SubOptions { + fused_activation_function:ActivationFunctionType; +} + +table DivOptions { + fused_activation_function:ActivationFunctionType; +} + +table TopKV2Options { +} + +enum CombinerType : byte { + SUM = 0, + MEAN = 1, + SQRTN = 2, +} + +table EmbeddingLookupSparseOptions { + combiner:CombinerType; +} + +table GatherOptions { + axis: int; +} + +table TransposeOptions { +} + +table ExpOptions { +} + +table ReducerOptions { + keep_dims: bool; +} + +table SqueezeOptions { + squeeze_dims:[int]; +} + +table SplitOptions { + num_splits: int; +} + +table SplitVOptions { + num_splits: int; +} + +table StridedSliceOptions { + begin_mask: int; + end_mask: int; + ellipsis_mask: int; + new_axis_mask: int; + shrink_axis_mask: int; +} + +table LogSoftmaxOptions { +} + +table CastOptions { + in_data_type: TensorType; + out_data_type: TensorType; +} + +table DequantizeOptions { +} + +table MaximumMinimumOptions { +} + +table TileOptions { +} + +table ArgMaxOptions { + output_type : TensorType; +} + +table ArgMinOptions { + output_type : TensorType; +} + +table GreaterOptions { +} + +table GreaterEqualOptions { +} + +table LessOptions { +} + +table LessEqualOptions { +} + +table NegOptions { +} + +table SelectOptions { +} + +table SliceOptions { +} + +table TransposeConvOptions { + padding:Padding; + stride_w:int; + stride_h:int; +} + +table ExpandDimsOptions { +} + +table SparseToDenseOptions { + validate_indices:bool; +} + +table EqualOptions { +} + +table NotEqualOptions { +} + +table ShapeOptions { + // Optional output type of the operation (int32 or int64). Defaults to int32. + out_type : TensorType; +} + +table PowOptions { +} + +table FakeQuantOptions { + // Parameters supported by version 1: + min:float; + max:float; + num_bits:int; + + // Parameters supported by version 2: + narrow_range:bool; +} + +table PackOptions { + values_count:int; + axis:int; +} + +table LogicalOrOptions { +} + +table OneHotOptions { + axis:int; +} + +table AbsOptions { +} + + +table LogicalAndOptions { +} + +table LogicalNotOptions { +} + +table UnpackOptions { + num:int; + axis:int; +} + +table FloorDivOptions { +} + +table SquareOptions { +} + +table ZerosLikeOptions { +} + +table FillOptions { +} + +table FloorModOptions { +} + +table RangeOptions { +} + +table LeakyReluOptions { + alpha:float; +} + +table SquaredDifferenceOptions { +} + +enum MirrorPadMode : byte { + // Doesn't include borders. + REFLECT = 0, + // Includes borders. + SYMMETRIC = 1, +} + +table MirrorPadOptions { + mode:MirrorPadMode; +} + +// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a +// builtin, or a string if the operator is custom. +table OperatorCode { + builtin_code:BuiltinOperator; + custom_code:string; + + // The version of the operator. The version need to be bumped whenever new + // parameters are introduced into an op. + version:int = 1; +} + +enum CustomOptionsFormat : byte { + FLEXBUFFERS = 0, +} + +// An operator takes tensors as inputs and outputs. The type of operation being +// performed is determined by an index into the list of valid OperatorCodes, +// while the specifics of each operations is configured using builtin_options +// or custom_options. +table Operator { + // Index into the operator_codes array. Using an integer here avoids + // complicate map lookups. + opcode_index:uint; + + // Optional input and output tensors are indicated by -1. + inputs:[int]; + outputs:[int]; + + builtin_options:BuiltinOptions; + custom_options:[ubyte]; + custom_options_format:CustomOptionsFormat; + + // A list of booleans indicating the input tensors which are being mutated by + // this operator.(e.g. used by RNN and LSTM). + // For example, if the "inputs" array refers to 5 tensors and the second and + // fifth are mutable variables, then this list will contain + // [false, true, false, false, true]. + // + // If the list is empty, no variable is mutated in this operator. + // The list either has the same length as `inputs`, or is empty. + mutating_variable_inputs:[bool]; +} + +// The root type, defining a subgraph, which typically represents an entire +// model. +table SubGraph { + // A list of all tensors used in this subgraph. + tensors:[Tensor]; + + // Indices of the tensors that are inputs into this subgraph. Note this is + // the list of non-static tensors that feed into the subgraph for inference. + inputs:[int]; + + // Indices of the tensors that are outputs out of this subgraph. Note this is + // the list of output tensors that are considered the product of the + // subgraph's inference. + outputs:[int]; + + // All operators, in execution order. + operators:[Operator]; + + // Name of this subgraph (used for debugging). + name:string; +} + +// Table of raw data buffers (used for constant tensors). Referenced by tensors +// by index. The generous alignment accommodates mmap-friendly data structures. +table Buffer { + data:[ubyte] (force_align: 16); +} + +table Model { + // Version of the schema. + version:uint; + + // A list of all operator codes used in this model. This is + // kept in order because operators carry an index into this + // vector. + operator_codes:[OperatorCode]; + + // All the subgraphs of the model. The 0th is assumed to be the main + // model. + subgraphs:[SubGraph]; + + // A description of the model. + description:string; + + // Buffers of the model. + // Note the 0th entry of this array must be an empty buffer (sentinel). + // This is a convention so that tensors without a buffer can provide 0 as + // their buffer. + buffers:[Buffer]; + + // Metadata about the model. Indirects into the existings buffers list. + metadata_buffer:[int]; +} + +root_type Model; diff --git a/runtime/neurun/sample/CMakeLists.txt b/runtime/neurun/sample/CMakeLists.txt new file mode 100644 index 000000000..d853ba634 --- /dev/null +++ b/runtime/neurun/sample/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(minimal) diff --git a/runtime/neurun/sample/minimal/CMakeLists.txt b/runtime/neurun/sample/minimal/CMakeLists.txt new file mode 100644 index 000000000..6f4b02761 --- /dev/null +++ b/runtime/neurun/sample/minimal/CMakeLists.txt @@ -0,0 +1,10 @@ +if(NOT BUILD_MINIMAL_SAMPLE) + return() +endif(NOT BUILD_MINIMAL_SAMPLE) + +list(APPEND MINIMAL_SRCS "src/minimal.cc") + +add_executable(minimal ${MINIMAL_SRCS}) +target_link_libraries(minimal nnfw-dev pthread dl) + +install(TARGETS minimal DESTINATION bin) diff --git a/runtime/neurun/sample/minimal/README.md b/runtime/neurun/sample/minimal/README.md new file mode 100644 index 000000000..fecad6fb2 --- /dev/null +++ b/runtime/neurun/sample/minimal/README.md @@ -0,0 +1,13 @@ +# minimal + +`minimal` is a simple driver to run `nnpackage` with nnfw API. + +It takes `nnpackage` as input. It uses **nnfwAPI** internally. + +It assumes model of float32 tensor type as an input. + +## Usage + +``` +$ ./minimal path_to_nnpackage_directory +``` diff --git a/runtime/neurun/sample/minimal/src/minimal.cc b/runtime/neurun/sample/minimal/src/minimal.cc new file mode 100644 index 000000000..003c8a323 --- /dev/null +++ b/runtime/neurun/sample/minimal/src/minimal.cc @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnfw.h" +#include <vector> + +uint64_t num_elems(const nnfw_tensorinfo *ti) +{ + uint64_t n = 1; + for (uint32_t i = 0; i < ti->rank; ++i) + { + n *= ti->dims[i]; + } + return n; +} + +int main(const int argc, char **argv) +{ + nnfw_session *session = nullptr; + nnfw_create_session(&session); + + // Loading nnpackage + nnfw_load_model_from_file(session, argv[1]); + + // Use acl_neon backend for CONV_2D and acl_cl for otherwise. + // Note that defalut backend is acl_cl + nnfw_set_op_backend(session, "CONV_2D", "acl_neon"); + + // Compile model + nnfw_prepare(session); + + // Prepare input. Here we just allocate dummy input arrays. + std::vector<float> input; + nnfw_tensorinfo ti; + nnfw_input_tensorinfo(session, 0, &ti); // get first input's info + uint32_t input_elements = num_elems(&ti); + input.resize(input_elements); + // TODO: Please add initialization for your input. + nnfw_set_input(session, 0, ti.dtype, input.data(), sizeof(float) * input_elements); + + // Prepare output + std::vector<float> output; + nnfw_output_tensorinfo(session, 0, &ti); // get first output's info + uint32_t output_elements = num_elems(&ti); + output.resize(output_elements); + nnfw_set_output(session, 0, ti.dtype, output.data(), sizeof(float) * output_elements); + + // Do inference + nnfw_run(session); + + // TODO: Please print or compare the output value in your way. + + return 0; +} diff --git a/runtime/neurun/test/CMakeLists.txt b/runtime/neurun/test/CMakeLists.txt new file mode 100644 index 000000000..815173937 --- /dev/null +++ b/runtime/neurun/test/CMakeLists.txt @@ -0,0 +1,15 @@ +set(TEST_NEURUN test_neurun) + +file(GLOB_RECURSE TESTS "*.cc") + +add_executable(${TEST_NEURUN} ${TESTS}) + +target_include_directories(${TEST_NEURUN} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../core/src) + +target_link_libraries(${TEST_NEURUN} neurun_core) +target_link_libraries(${TEST_NEURUN} gtest) +target_link_libraries(${TEST_NEURUN} gtest_main) +target_link_libraries(${TEST_NEURUN} ${LIB_PTHREAD} dl) +add_test(${TEST_NEURUN} ${TEST_NEURUN}) + +install(TARGETS ${TEST_NEURUN} DESTINATION unittest) diff --git a/runtime/neurun/test/core/backend/ExecTime.test.cc b/runtime/neurun/test/core/backend/ExecTime.test.cc new file mode 100644 index 000000000..4b89e64d2 --- /dev/null +++ b/runtime/neurun/test/core/backend/ExecTime.test.cc @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/ExecTime.h" +#include "backend/IConfig.h" +#include "backend/Backend.h" +#include <gtest/gtest.h> +#include <string> + +namespace +{ +using namespace neurun; +using namespace backend; + +struct MockConfig : public IConfig +{ + std::string id() override { return "b1"; } + bool initialize() override { return true; }; + bool SupportPermutation() override { return false; } + bool SupportSubTensorAlloc() override { return false; } +}; + +struct MockBackend : public ::neurun::backend::Backend +{ + std::shared_ptr<neurun::backend::IConfig> config() const override + { + return std::make_shared<MockConfig>(); + } + std::unique_ptr<BackendContext> + newContext(const ir::Operands &, const std::shared_ptr<custom::IKernelBuilder> &kb) const override + { + return nullptr; + } +}; + +TEST(ExecTime, roundtrip_ok) +{ + const auto *b = new MockBackend(); + std::vector<const Backend *> bs = {b}; + { + ExecTime et(bs); + et.updateOperationExecTime(b, "op1", true, 100, 100); + et.updateOperationExecTime(b, "op1", true, 200, 200); + et.updateOperationExecTime(b, "op1", false, 100, 888); + et.uploadOperationsExecTime(); + } + { + ExecTime et(bs); + auto time = et.getOperationExecTime(b, "op1", true, 100); + ASSERT_EQ(time, 100); + // Check interpolation + time = et.getOperationExecTime(b, "op1", true, 150); + ASSERT_EQ(time, 150); + time = et.getOperationExecTime(b, "op1", false, 100); + ASSERT_EQ(time, 888); + et.uploadOperationsExecTime(); + } + // clean up + EXPECT_EQ(remove("exec_time.json"), 0); +} + +TEST(ExecTime, structure) +{ + + const auto *b = new MockBackend(); + std::vector<const Backend *> bs = {b}; + { + ExecTime et(bs); + et.updateOperationExecTime(b, "op1", true, 100, 100); + et.updateOperationExecTime(b, "op1", true, 200, 200); + et.uploadOperationsExecTime(); + } + { + ExecTime et(bs); + auto time = et.getOperationExecTime(b, "op1", true, 100); + ASSERT_EQ(time, 100); + // Check interpolation + time = et.getOperationExecTime(b, "op1", true, 200); + ASSERT_EQ(time, 200); + et.uploadOperationsExecTime(); + } + // clean up + EXPECT_EQ(remove("exec_time.json"), 0); +} +} // unnamed namespace diff --git a/runtime/neurun/test/core/compiler/Scheduler.cc b/runtime/neurun/test/core/compiler/Scheduler.cc new file mode 100644 index 000000000..a4fbfeb2c --- /dev/null +++ b/runtime/neurun/test/core/compiler/Scheduler.cc @@ -0,0 +1,550 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <compiler/HEScheduler.h> +#include <backend/ExecTime.h> +#include <backend/IShapeFixer.h> + +#include <ir/Shape.h> +#include <ir/InternalType.h> +#include <ir/TypeInfo.h> +#include <ir/DataType.h> + +#include <ir/operation/Add.h> +#include <ir/operation/Sub.h> +#include <ir/operation/Mul.h> +#include <ir/operation/FullyConnected.h> + +#include <gtest/gtest.h> + +namespace +{ +using namespace neurun; +using namespace ir; +using namespace backend; +using namespace operation; + +// +// Mock backends classes +// + +// Backend could be created without ShapeFixer. +// But it is used by scheduler to detect which operations are supported by backend. +struct MockShapeFixer : IShapeFixer +{ + void visit(const Add &) override {} + void visit(const Sub &) override {} + void visit(const Mul &) override {} + void visit(const FullyConnected &) override {} +}; + +struct MockConfigCPU : public IConfig +{ + std::string id() override { return "cpu"; } + bool initialize() override { return true; }; + bool SupportPermutation() override { return false; } + bool SupportSubTensorAlloc() override { return false; } +}; + +struct MockBackendCPU : public Backend +{ + std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); } + std::unique_ptr<BackendContext> + newContext(const Operands &, const std::shared_ptr<custom::IKernelBuilder> &) const override + { + return std::unique_ptr<BackendContext>( + new BackendContext{this, nullptr, nullptr, nullptr, std::make_shared<MockShapeFixer>()}); + } +}; + +struct MockConfigGPU : public IConfig +{ + std::string id() override { return "gpu"; } + bool initialize() override { return true; }; + bool SupportPermutation() override { return false; } + bool SupportSubTensorAlloc() override { return false; } +}; + +struct MockBackendGPU : public Backend +{ + std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); } + std::unique_ptr<BackendContext> + newContext(const Operands &, const std::shared_ptr<custom::IKernelBuilder> &) const override + { + return std::unique_ptr<BackendContext>( + new BackendContext{this, nullptr, nullptr, nullptr, std::make_shared<MockShapeFixer>()}); + } +}; + +struct MockConfigNPU : public IConfig +{ + std::string id() override { return "npu"; } + bool initialize() override { return true; }; + bool SupportPermutation() override { return false; } + bool SupportSubTensorAlloc() override { return false; } +}; + +struct MockBackendNPU : public Backend +{ + std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); } + std::unique_ptr<BackendContext> + newContext(const Operands &, const std::shared_ptr<custom::IKernelBuilder> &) const override + { + return std::unique_ptr<BackendContext>( + new BackendContext{this, nullptr, nullptr, nullptr, std::make_shared<MockShapeFixer>()}); + } +}; + +// +// Constants +// + +const int OPERAND_ELEMS = 268203; +const int OPERAND_SIZE = OPERAND_ELEMS * 4; +const int OPERATION_SIZE = OPERAND_SIZE * 3; + +const std::string LINEAR("Linear"); +const std::string DATAFLOW("Dataflow"); +const std::string PARALLEL("Parallel"); + +// +// Helper functions +// + +// Set executor through environment variable +void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); } + +// Set profiling mode through environment variable +void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); } + +// Calculate operation size by addition sizes of all input and output operands +uint32_t calcOpSize(const std::shared_ptr<Graph> &graph, const OperationIndex &op_idx) +{ + uint32_t size = 0; + for (const auto &input : graph->operations().at(op_idx).getInputs()) + size += graph->operands().at(input).info().total_size(); + for (const auto &output : graph->operations().at(op_idx).getOutputs()) + size += graph->operands().at(output).info().total_size(); + return size; +} + +// Set execution operation time. This method is needed since ExecutionTime has only +// 'updateOperationExecTime' method. +void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation, + bool quant, uint32_t op_size, int64_t time) +{ + // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it + assert(time > 0); + int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size); + int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time; + et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set); + assert(et.getOperationExecTime(backend, operation, quant, op_size) == time); +} + +// Set same execution time for all given backends/operations +void setOperationsExecutionTime(const std::vector<const Backend *> &backends, + const std::vector<std::string> &op_names, + const std::vector<uint32_t> &op_sizes, int64_t exec_time) +{ + assert(op_names.size() == op_sizes.size()); + ExecTime et(backends); + for (int i = 0; i < op_names.size(); ++i) + { + for (auto &backend : backends) + setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time); + } + et.uploadOperationsExecTime(); +} + +// Set permute time from one backend to another. This method is needed since ExecutionTime has only +// 'updatePermuteTime' method. +void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend, + bool quant, uint32_t op_size, int64_t time) +{ + // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it + assert(time > 0); + int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size); + int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time; + et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set); + assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time); +} + +// Set same permutation time between all given backends +void setPermutationsExecutionTime(const std::vector<const Backend *> &backends, + const int operand_size, const int64_t exec_time) +{ + ExecTime et(backends); + for (const auto &backend : backends) + { + for (auto &other_backend : backends) + { + if (backend == other_backend) + continue; + setPermutationTime(et, backend, other_backend, false, operand_size, exec_time); + } + } + et.uploadOperationsExecTime(); +} + +// +// Functions for creating graphs +// + +using OIS = OperandIndexSequence; + +template <typename NodeT, typename... Types> +OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args) +{ + typename NodeT::Param op_params{Activation::NONE}; + auto op = nnfw::cpp14::make_unique<NodeT>(std::forward<Types>(args)..., op_params); + auto op_idx = graph->addOperation(std::move(op)); + // For now in scheduler test all operations in tested graphs has same size (for simplicity) + assert(calcOpSize(graph, op_idx) == OPERATION_SIZE); + return op_idx; +} + +// Create straight graph: Add->Sub->Mul +std::shared_ptr<Graph> createStraightGraph() +{ + auto graph = std::make_shared<Graph>(); + const TypeInfo float_op(DataType::FLOAT32); + + // Create add node + auto add_lhs_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + auto add_rhs_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + auto add_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}); + + // Create sub node + auto sub_const_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + auto sub_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + create<Sub>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}); + + // Create mul node + auto mul_const_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + auto mul_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + create<Mul>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}); + + graph->finishBuilding(); + return graph; +} + +/* Create branched graph: + * [Add] + * // \\ + * [Mul1] [FC2] + * || || + * [Mul2] [FC2] + * \\ // + * [Sub] + */ +std::shared_ptr<Graph> createBranchedGraph() +{ + auto graph = std::make_shared<Graph>(); + const TypeInfo float_op(DataType::FLOAT32); + + // Create add node + auto add_lhs_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + auto add_rhs_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + auto add_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}); + + // Create mul1 node + auto mul1_const_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + auto mul1_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + create<Mul>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx}); + + // Create mul2 node + auto mul2_const_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + auto mul2_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + create<Mul>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx}); + + // Create fc1 node + auto fc1_const_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + auto fc1_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}); + + // Create fc2 node + auto fc2_const_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + auto fc2_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}); + + // Create add2 node + auto sub_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op); + create<Sub>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}); + + graph->finishBuilding(); + return graph; +} + +// +// Tests setup/teardown +// + +// SetUp/TearDown methods runs before/after each test and performs actions common for each test +class SchedulerTest : public ::testing::Test +{ +protected: + void SetUp() override + { + // Initialize mock backends + _cpu_backend = new MockBackendCPU(); + _gpu_backend = new MockBackendGPU(); + _npu_backend = new MockBackendNPU(); + _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend}; + + // Remove previous profile data if it exists + if (!remove("exec_time.json")) + { + // DO NOTHING (no profile data) + } + + // Remember original value of 'EXECUTOR' environment variable + char *executor = std::getenv("EXECUTOR"); + _original_executor = executor == nullptr ? "" : executor; + + // Remember original value of 'PROFILING_MODE' environment variable + char *profiling_mode = std::getenv("PROFILING_MODE"); + _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode; + } + + void TearDown() override + { + delete _cpu_backend; + delete _gpu_backend; + delete _npu_backend; + EXPECT_EQ(remove("exec_time.json"), 0); + setenv("EXECUTOR", _original_executor.c_str(), true); + setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true); + } + + const MockBackendCPU *_cpu_backend{nullptr}; + const MockBackendGPU *_gpu_backend{nullptr}; + const MockBackendNPU *_npu_backend{nullptr}; + std::vector<const Backend *> _mock_backends; + + std::string _original_executor; + std::string _original_profiling_mode; +}; + +class SchedulerTestWithExecutorParam : public SchedulerTest, + public testing::WithParamInterface<std::string> +{ +}; + +// +// HEScheduler tests +// + +// Test scheduler behavior for straight graph with known execution time of all nodes and permutes. +TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time) +{ + setExecutor(GetParam()); + + // Prepare graph + auto graph(createStraightGraph()); + OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2); + + // Set default execution and transfer time + setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1); + setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"}, + {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4); + + // Test 1 + // Expected behaviour: scheduler assigns different backend to each node + { + // For each backend reduce execution time of one node + ExecTime et(_mock_backends); + setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1); + setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1); + setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1); + et.uploadOperationsExecTime(); + + // Test scheduler + auto scheduler = compiler::HEScheduler(graph->operands(), _mock_backends, nullptr); + const auto br = scheduler.schedule(*graph); + ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu"); + ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu"); + ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu"); + } + + // Test 2 + // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time + { + // Increase transfer time + setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5); + + // Test scheduler + auto scheduler = compiler::HEScheduler(graph->operands(), _mock_backends, nullptr); + const auto br = scheduler.schedule(*graph); + ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu"); + ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu"); + ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu"); + } +} + +// Test scheduler behavior for branched graph with known execution time of all nodes and permutes +TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time) +{ + const int64_t NPU_ET = 5000; + setExecutor(GetParam()); + + // Prepare graph + auto graph(createBranchedGraph()); + OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4), + sub_op_idx(5); + + // Set default execution and transfer time + setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000); + setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"}, + {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4); + + // Test 1 + // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all + // nodes, in case of parallel executor scheduler assigns different backends to branches. + { + // Reduce execution time + ExecTime et(_mock_backends); + setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET); + setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET); + setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET); + setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET); + setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000); + setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000); + et.uploadOperationsExecTime(); + + // Test scheduler + auto scheduler = compiler::HEScheduler(graph->operands(), _mock_backends, nullptr); + const auto br = scheduler.schedule(*graph); + + std::string branch1_expected_backend("npu"), branch2_expected_backend("npu"); + if (GetParam() == PARALLEL) + { + branch1_expected_backend = + br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu"; + branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu"; + } + + ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend); + ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend); + ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend); + ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend); + ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu"); + } + + // Test 2 + // Expected behaviour: scheduler assigns single backend to all nodes + { + // Increase execution time for GPU backend + ExecTime et(_mock_backends); + /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt * + * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the + * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter + * branching or scheduler assigns another backend to a node*/ + setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1); + setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1); + et.uploadOperationsExecTime(); + + // Test scheduler + auto scheduler = compiler::HEScheduler(graph->operands(), _mock_backends, nullptr); + const auto br = scheduler.schedule(*graph); + ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu"); + } +} + +// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times - +// one time for each executor +INSTANTIATE_TEST_CASE_P(AllExecutors, SchedulerTestWithExecutorParam, + testing::Values(LINEAR, DATAFLOW, PARALLEL)); + +// Test scheduler behavior for branched graph and enabled profiling mode +TEST_F(SchedulerTest, branched_graph_profiling_mode) +{ + const int ET = 1e5; + + // Turn on profiling mode + setProfilingMode(true); + setExecutor(DATAFLOW); + + // Prepare graph + auto graph(createBranchedGraph()); + OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4), + sub_op_idx(5); + + // Test 1 + // Expected behaviour: scheduler assigns backends to nodes with unknown execution time + { + // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC + ExecTime et(_mock_backends); + setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1); + setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1); + setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET); + et.uploadOperationsExecTime(); + + // Test scheduler + auto scheduler = compiler::HEScheduler(graph->operands(), _mock_backends, nullptr); + const auto br = scheduler.schedule(*graph); + ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu"); + ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu"); + ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu"); + } + + // Test 2 + // Expected behaviour: scheduler shuffling backends, so different backends are assigned to + // neighbor nodes + { + // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC) + ExecTime et(_mock_backends); + setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1); + setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET); + et.uploadOperationsExecTime(); + + // Test scheduler + auto scheduler = compiler::HEScheduler(graph->operands(), _mock_backends, nullptr); + const auto br = scheduler.schedule(*graph); + ASSERT_NE(br->getBackend(add_op_idx)->config()->id(), + br->getBackend(mul1_op_idx)->config()->id()); + ASSERT_NE(br->getBackend(add_op_idx)->config()->id(), + br->getBackend(fc1_op_idx)->config()->id()); + ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(), + br->getBackend(mul2_op_idx)->config()->id()); + ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(), + br->getBackend(fc2_op_idx)->config()->id()); + ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(), + br->getBackend(sub_op_idx)->config()->id()); + ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(), + br->getBackend(sub_op_idx)->config()->id()); + } +} + +// TODO: Add tests with unknown execution and permutation time + +} // unnamed namespace diff --git a/runtime/neurun/test/core/exec/ExecInstance.cc b/runtime/neurun/test/core/exec/ExecInstance.cc new file mode 100644 index 000000000..49d561226 --- /dev/null +++ b/runtime/neurun/test/core/exec/ExecInstance.cc @@ -0,0 +1,307 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> +#include <thread> + +#include "ir/Graph.h" +#include "compiler/Compiler.h" +#include "exec/Execution.h" +#include "ir/operation/Add.h" + +namespace +{ + +using namespace neurun::ir; + +class CompiledMockUpModel +{ +public: + CompiledMockUpModel() + { + // Model: two elementwise add operation + // model input: lhs, rhs1 + // model output: second add result (result2) + // constant: rhs2 + // result1 <= (lhs + rhs) + // result2 <= (result1 + rhs2) + // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1} + // activation: none (constant) + graph = std::make_shared<Graph>(); + // 1st add operands (result1 <= lhs + rhs1) + Shape shape{1, 2, 2, 1}; + TypeInfo type{DataType::FLOAT32}; + static float rhs2_data[4] = {3, 1, -1, 5}; + auto operand_lhs = graph->addOperand(shape, type); + auto operand_rhs1 = graph->addOperand(shape, type); + auto operand_result1 = graph->addOperand(shape, type); + auto operand_rhs2 = graph->addOperand(shape, type); + auto operand_result2 = graph->addOperand(shape, type); + graph->operands() + .at(operand_rhs2) + .data(nnfw::cpp14::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), + 16)); + // 2nd add operations (result2 <= result1 + rhs2) + operation::Add::Param param1; + param1.activation = Activation::NONE; + auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1}; + auto output_set1 = OperandIndexSequence{operand_result1}; + graph->addOperation(nnfw::cpp14::make_unique<operation::Add>(input_set1, output_set1, param1)); + operation::Add::Param param2; + param2.activation = Activation::NONE; + auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2}; + auto output_set2 = OperandIndexSequence{operand_result2}; + graph->addOperation(nnfw::cpp14::make_unique<operation::Add>(input_set2, output_set2, param2)); + // Identify model inputs and outputs + graph->addInput(operand_lhs); + graph->addInput(operand_rhs1); + graph->addOutput(operand_result2); + graph->finishBuilding(); + + // Compile + auto compiler = new neurun::compiler::Compiler{graph}; + compiler->compile(); + compiler->release(executor); + delete compiler; + } + +public: + std::shared_ptr<Graph> graph; + std::shared_ptr<neurun::exec::IExecutor> executor; +}; + +TEST(ExecInstance, simple) +{ + auto mockup = CompiledMockUpModel(); + auto graph = mockup.graph; + auto executor = mockup.executor; + + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output = IOIndex{0}; + + const float input1_buffer[4] = {1, 0, -1, -2}; + const float input2_buffer[4] = {1, -3, 2, -4}; + float output_buffer[4] = {}; + const float output_expected[4] = {5, -2, 0, -1}; + + auto execution = new neurun::exec::Execution(executor); + + execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16); + execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16); + execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16); + execution->execute(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(output_buffer[i], output_expected[i]); + } + + delete execution; +} + +TEST(ExecInstance, twoCompile) +{ + auto mockup = CompiledMockUpModel(); + auto graph = mockup.graph; + auto executor1 = mockup.executor; + auto execution1 = new neurun::exec::Execution(executor1); + + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output = IOIndex{0}; + + const float exe1_input1_buffer[4] = {1, 0, -1, -2}; + const float exe1_input2_buffer[4] = {1, -3, 2, -4}; + float exe1_output_buffer[4] = {}; + const float exe1_output_expected[4] = {5, -2, 0, -1}; + + execution1->setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16); + execution1->setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16); + execution1->setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16); + + // Make new executor: compile again + auto compiler = new neurun::compiler::Compiler{graph}; + compiler->compile(); + std::shared_ptr<neurun::exec::IExecutor> executor2; + compiler->release(executor2); + auto execution2 = new neurun::exec::Execution(executor2); + + const float exe2_input1_buffer[4] = {2, 1, -2, 0}; + const float exe2_input2_buffer[4] = {-3, 3, 1, 2}; + float exe2_output_buffer[4] = {}; + const float exe2_output_expected[4] = {2, 5, -2, 7}; + + execution2->setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16); + execution2->setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16); + execution2->setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16); + + execution1->execute(); + execution2->execute(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]); + EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]); + } + + delete compiler; + delete execution1; + delete execution2; +} + +// Support two initialized execution instance then ordered execution +TEST(ExecInstance, twoExecution) +{ + auto mockup = CompiledMockUpModel(); + auto executor = mockup.executor; + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output1 = IOIndex{0}; + + const float exe1_input1_buffer[4] = {1, 0, -1, -2}; + const float exe1_input2_buffer[4] = {1, -3, 2, -4}; + float exe1_output_buffer[4] = {}; + const float exe1_output_expected[4] = {5, -2, 0, -1}; + const float exe2_output_expected[4] = {2, 5, -2, 7}; + + auto execution1 = new neurun::exec::Execution(executor); + execution1->setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16); + execution1->setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16); + execution1->setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16); + + const float exe2_input1_buffer[4] = {2, 1, -2, 0}; + const float exe2_input2_buffer[4] = {-3, 3, 1, 2}; + float exe2_output_buffer[4] = {}; + + // Make new execution + auto execution2 = new neurun::exec::Execution(executor); + execution2->setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16); + execution2->setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16); + execution2->setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16); + + execution1->execute(); + execution2->execute(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]); + EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]); + } + + delete execution1; + delete execution2; +} + +class Inference +{ +public: + Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4], + std::shared_ptr<neurun::exec::IExecutor> &executor) + : _input1{input1}, _input2{input2}, _output{output}, _executor{executor} + { + // DO NOTHING + } + + void inference(void) + { + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output1 = IOIndex{0}; + + auto execution = new neurun::exec::Execution(_executor); + execution->setInput(input1, reinterpret_cast<const void *>(_input1), 16); + execution->setInput(input2, reinterpret_cast<const void *>(_input2), 16); + execution->setOutput(output1, reinterpret_cast<void *>(_output), 16); + + execution->execute(); + + delete execution; + } + +private: + const float (&_input1)[4]; + const float (&_input2)[4]; + float (&_output)[4]; + std::shared_ptr<neurun::exec::IExecutor> &_executor; +}; + +// Support multi-thread execution +TEST(ExecInstance, twoThreads) +{ + auto mockup = CompiledMockUpModel(); + auto executor = mockup.executor; + + const float exe1_input1_buffer[4] = {1, 0, -1, -2}; + const float exe1_input2_buffer[4] = {1, -3, 2, -4}; + float exe1_output_buffer[4] = {}; + const float exe1_output_expected[4] = {5, -2, 0, -1}; + + Inference execution1{exe1_input1_buffer, exe1_input2_buffer, exe1_output_buffer, executor}; + + const float exe2_input1_buffer[4] = {2, 1, -2, 0}; + const float exe2_input2_buffer[4] = {-3, 3, 1, 2}; + float exe2_output_buffer[4] = {}; + const float exe2_output_expected[4] = {2, 5, -2, 7}; + + Inference execution2{exe2_input1_buffer, exe2_input2_buffer, exe2_output_buffer, executor}; + + std::thread t1{&Inference::inference, &execution1}; + std::thread t2{&Inference::inference, &execution2}; + + t1.join(); + t2.join(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]); + EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]); + } +} + +// Support asynchronous execution +TEST(ExecInstance, async) +{ + auto mockup = CompiledMockUpModel(); + auto graph = mockup.graph; + auto executor = mockup.executor; + + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output = IOIndex{0}; + + const float input1_buffer[4] = {1, 0, -1, -2}; + const float input2_buffer[4] = {1, -3, 2, -4}; + float output_buffer[4] = {}; + const float output_expected[4] = {5, -2, 0, -1}; + + auto execution = new neurun::exec::Execution(executor); + + execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16); + execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16); + execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16); + execution->startExecute(); + execution->waitFinish(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(output_buffer[i], output_expected[i]); + } + + delete execution; +} + +} // namespace diff --git a/runtime/neurun/test/core/exec/interp/ExecManager.cc b/runtime/neurun/test/core/exec/interp/ExecManager.cc new file mode 100644 index 000000000..aba64e28f --- /dev/null +++ b/runtime/neurun/test/core/exec/interp/ExecManager.cc @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include <cpp14/memory.h> + +#include "ir/Graph.h" +#include "exec/interp/ExecManager.h" +#include "exec/Execution.h" +#include "ir/operation/Add.h" + +namespace +{ + +using namespace neurun::ir; +using ExecManager = neurun::exec::interp::ExecManager; +using Execution = neurun::exec::Execution; + +class InterpExecManagerTest : public ::testing::Test +{ +protected: + virtual void SetUp() {} + void CreateSimpleModel() + { + // Model: one elementwise add operation + // model input: lhs, rhs + // model output: add result + // lhs, rhs, result shape: {1, 2, 2, 1} + // activation: none (constant) + _graph = nnfw::cpp14::make_unique<Graph>(); + + // Add operands + + Shape shape{1, 2, 2, 1}; + TypeInfo type{DataType::INT32}; + Shape shape_scalar(0); + TypeInfo type_scalar{DataType::INT32}; + + auto operand_lhs = _graph->addOperand(shape, type); + auto operand_rhs = _graph->addOperand(shape, type); + auto operand_result = _graph->addOperand(shape, type); + + // Add operations + + operation::Add::Param param; + param.activation = Activation::NONE; + auto input_set = OperandIndexSequence{operand_lhs, operand_rhs}; + auto output_set = OperandIndexSequence{operand_result}; + _graph->addOperation(nnfw::cpp14::make_unique<operation::Add>(input_set, output_set, param)); + + // Identify model inputs and outputs + + _graph->getInputs().append(operand_lhs); + _graph->getInputs().append(operand_rhs); + _graph->getOutputs().append(operand_result); + + _graph->finishBuilding(); + + _executor = nnfw::cpp14::make_unique<ExecManager>(*_graph); + } + + void CreateTwoStepModel() + { + // Model: two elementwise add operation + // model input: lhs, rhs1 + // model output: second add result (result2) + // constant: rhs2 + // result1 <= (lhs + rhs) + // result2 <= (result1 + rhs2) + // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1} + // activation: none (constant) + _graph = nnfw::cpp14::make_unique<Graph>(); + + // 1st add operands (result1 <= lhs + rhs1) + + Shape shape{1, 2, 2, 1}; + TypeInfo type{DataType::INT32}; + Shape shape_scalar(0); + TypeInfo type_scalar{DataType::INT32}; + + static int32_t rhs2_data[4] = {3, 1, -1, 5}; + + auto operand_lhs = _graph->addOperand(shape, type); + auto operand_rhs1 = _graph->addOperand(shape, type); + auto operand_result1 = _graph->addOperand(shape, type); + auto operand_rhs2 = _graph->addOperand(shape, type); + auto operand_result2 = _graph->addOperand(shape, type); + _graph->operands() + .at(operand_rhs2) + .data(nnfw::cpp14::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), + 16)); + + // 2nd add operations (result2 <= result1 + rhs2) + + operation::Add::Param param1; + param1.activation = Activation::NONE; + auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1}; + auto output_set1 = OperandIndexSequence{operand_result1}; + _graph->addOperation(nnfw::cpp14::make_unique<operation::Add>(input_set1, output_set1, param1)); + + operation::Add::Param param2; + param2.activation = Activation::NONE; + auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2}; + auto output_set2 = OperandIndexSequence{operand_result2}; + _graph->addOperation(nnfw::cpp14::make_unique<operation::Add>(input_set2, output_set2, param2)); + + // Identify model inputs and outputs + + _graph->getInputs().append(operand_lhs); + _graph->getInputs().append(operand_rhs1); + _graph->getOutputs().append(operand_result2); + + _graph->finishBuilding(); + + _executor = nnfw::cpp14::make_unique<ExecManager>(*_graph); + } + + void CreateUnspecifiedDimensionsModel() + { + // Model: one elementwise add operation + // model input: lhs, rhs + // model output: add result + // lhs, rhs, result shape: {1, unknown, 2, 1} + // activation: none (constant) + _graph = nnfw::cpp14::make_unique<Graph>(); + + // Add operands + + Shape shape{1, 0, 2, 1}; + TypeInfo type{DataType::INT32}; + Shape shape_scalar(0); + TypeInfo type_scalar{DataType::INT32}; + + auto operand_lhs = _graph->addOperand(shape, type); + auto operand_rhs = _graph->addOperand(shape, type); + + auto operand_activation = _graph->addOperand(shape_scalar, type_scalar); + _graph->operands() + .at(operand_activation) + .data(nnfw::cpp14::make_unique<CachedData>( + reinterpret_cast<const uint8_t *>(&_activation_value), 4)); + + auto operand_result = _graph->addOperand(shape, type); + + // Add operations + + operation::Add::Param param; + param.activation = Activation::NONE; + auto input_set = OperandIndexSequence{operand_lhs, operand_rhs}; + auto output_set = OperandIndexSequence{operand_result}; + _graph->addOperation(nnfw::cpp14::make_unique<operation::Add>(input_set, output_set, param)); + + // Identify model inputs and outputs + + _graph->getInputs().append(operand_lhs); + _graph->getInputs().append(operand_rhs); + _graph->getOutputs().append(operand_result); + + _graph->finishBuilding(); + + _executor = nnfw::cpp14::make_unique<ExecManager>(*_graph); + } + + void createExecution() { _execution = nnfw::cpp14::make_unique<Execution>(_executor); } + + virtual void TearDown() { _executor = nullptr; } + + std::unique_ptr<Graph> _graph{nullptr}; + std::shared_ptr<ExecManager> _executor{nullptr}; + std::unique_ptr<Execution> _execution{nullptr}; + const int32_t _activation_value{0}; +}; + +TEST_F(InterpExecManagerTest, create_empty) +{ + Graph graph; + graph.finishBuilding(); + _executor = nnfw::cpp14::make_unique<ExecManager>(graph); + ASSERT_NE(_executor, nullptr); +} + +TEST_F(InterpExecManagerTest, create_simple) +{ + CreateSimpleModel(); + ASSERT_NE(_executor, nullptr); +} + +TEST_F(InterpExecManagerTest, setInput) +{ + CreateSimpleModel(); + createExecution(); + + auto input1 = IOIndex{0}; + const int32_t input1_buffer[4] = {1, 0, -1, -2}; + + EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 4), + std::runtime_error); + EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 12), + std::runtime_error); + EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16)); +} + +TEST_F(InterpExecManagerTest, setOutput) +{ + CreateSimpleModel(); + createExecution(); + + auto output = IOIndex{0}; + auto output_idx = _graph->getOutputs().at(output); + + int32_t output_buffer[4] = {}; + + EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 4), + std::runtime_error); + EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 12), + std::runtime_error); + EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16)); +} + +TEST_F(InterpExecManagerTest, setInputForUnspecifiedDimensions) +{ + CreateUnspecifiedDimensionsModel(); + createExecution(); + + auto input1 = IOIndex{0}; + const int32_t input1_buffer[4] = {1, 0, -1, -2}; + + TypeInfo operand_type{DataType::INT32}; + Shape operand_shape{1, 2, 2, 1}; + + EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape, + reinterpret_cast<const void *>(input1_buffer), 4), + std::runtime_error); + EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape, + reinterpret_cast<const void *>(input1_buffer), 12), + std::runtime_error); + EXPECT_NO_THROW(_execution->setInput(input1, operand_type, operand_shape, + reinterpret_cast<const void *>(input1_buffer), 16)); +} + +TEST_F(InterpExecManagerTest, setOutputForUnspecifiedDimensions) +{ + CreateUnspecifiedDimensionsModel(); + createExecution(); + + auto output = IOIndex{0}; + auto output_idx = _graph->getOutputs().at(output); + + TypeInfo operand_type{DataType::INT32}; + Shape operand_shape{1, 2, 2, 1}; + + int32_t output_buffer[4] = {}; + + EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape, + reinterpret_cast<void *>(output_buffer), 4), + std::runtime_error); + EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape, + reinterpret_cast<void *>(output_buffer), 12), + std::runtime_error); + EXPECT_NO_THROW(_execution->setOutput(output, operand_type, operand_shape, + reinterpret_cast<void *>(output_buffer), 16)); +} + +TEST_F(InterpExecManagerTest, execute) +{ + CreateSimpleModel(); + createExecution(); + + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto input1_idx = _graph->getInputs().at(input1); + auto input2_idx = _graph->getInputs().at(input2); + + const int32_t input1_buffer[4] = {1, 0, -1, -2}; + const int32_t input2_buffer[4] = {1, -3, 2, -4}; + + auto output = IOIndex{0}; + auto output_idx = _graph->getOutputs().at(output); + + int32_t output_buffer[4] = {}; + + EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16)); + EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16)); + EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16)); + EXPECT_NO_THROW(_execution->execute()); + EXPECT_EQ(output_buffer[0], 2); + EXPECT_EQ(output_buffer[1], -3); + EXPECT_EQ(output_buffer[2], 1); + EXPECT_EQ(output_buffer[3], -6); +} + +TEST_F(InterpExecManagerTest, executeTwoStep) +{ + CreateTwoStepModel(); + createExecution(); + + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto input1_idx = _graph->getInputs().at(input1); + auto input2_idx = _graph->getInputs().at(input2); + + const int32_t input1_buffer[4] = {1, 0, -1, -2}; + const int32_t input2_buffer[4] = {1, -3, 2, -4}; + + auto output = IOIndex{0}; + auto output_idx = _graph->getOutputs().at(output); + + int32_t output_buffer[4] = {}; + + EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16)); + EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16)); + EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16)); + EXPECT_NO_THROW(_execution->execute()); + EXPECT_EQ(output_buffer[0], 5); + EXPECT_EQ(output_buffer[1], -2); + EXPECT_EQ(output_buffer[2], 0); + EXPECT_EQ(output_buffer[3], -1); +} + +} // namespace diff --git a/runtime/neurun/test/graph/Graph.cc b/runtime/neurun/test/graph/Graph.cc new file mode 100644 index 000000000..faf84df9c --- /dev/null +++ b/runtime/neurun/test/graph/Graph.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include "ir/Graph.h" + +TEST(Graph, inputs_and_outputs) +{ + neurun::ir::Graph graph; + + neurun::ir::OperandIndex index0{0u}; + neurun::ir::OperandIndex index1{1u}; + + graph.addInput({index0}); + graph.addInput({index1}); + + neurun::ir::OperandIndex index10{10u}; + neurun::ir::OperandIndex index11{11u}; + neurun::ir::OperandIndex index12{12u}; + + graph.addOutput({index10}); + graph.addOutput({index11}); + graph.addOutput({index12}); + + ASSERT_EQ(graph.getInputs().size(), 2); + ASSERT_EQ(graph.getOutputs().size(), 3); + + neurun::ir::IOIndex io_index0{0}; + neurun::ir::IOIndex io_index1{1}; + neurun::ir::IOIndex io_index2{2}; + + ASSERT_EQ(graph.getInputs().at(io_index0), 0); + ASSERT_EQ(graph.getInputs().at(io_index1), 1); + + ASSERT_EQ(graph.getOutputs().at(io_index0), 10); + ASSERT_EQ(graph.getOutputs().at(io_index1), 11); + ASSERT_EQ(graph.getOutputs().at(io_index2), 12); +} diff --git a/runtime/neurun/test/graph/Index.cc b/runtime/neurun/test/graph/Index.cc new file mode 100644 index 000000000..c24c2dd5e --- /dev/null +++ b/runtime/neurun/test/graph/Index.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include "util/Index.h" + +using Index = ::neurun::util::Index<uint32_t, struct TestTag>; + +TEST(Index, index_test) +{ + Index idx1{1u}; + Index idx2{2u}; + Index idx3{idx1}; + + ASSERT_EQ(idx1, 1); + ASSERT_EQ(idx1, 1u); + ASSERT_EQ(idx1.value(), 1u); + ASSERT_NE(idx1, idx2); + ASSERT_EQ(idx1, idx3); +} diff --git a/runtime/neurun/test/graph/MockNode.h b/runtime/neurun/test/graph/MockNode.h new file mode 100644 index 000000000..67f4c049d --- /dev/null +++ b/runtime/neurun/test/graph/MockNode.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_TEST_GRAPH_MOCK_NODE_H__ +#define __NEURUN_TEST_GRAPH_MOCK_NODE_H__ + +#include "ir/Operation.h" +#include "ir/OperandIndexSequence.h" + +namespace neurun_test +{ +namespace ir +{ + +class SimpleMock : public neurun::ir::Operation +{ +public: + SimpleMock(const neurun::ir::OperandIndexSequence &inputs, + const neurun::ir::OperandIndexSequence &outputs) + : Operation{neurun::ir::OperandConstraint::createAny()} + { + setInputs(inputs); + setOutputs(outputs); + } + +public: + void accept(neurun::ir::OperationVisitor &) const override {} + neurun::ir::OpCode opcode() const final { return neurun::ir::OpCode::Invalid; } +}; + +} // namespace ir +} // namespace neurun_test + +#endif // __NEURUN_TEST_GRAPH_MOCK_NODE_H__ diff --git a/runtime/neurun/test/graph/operand/IndexSet.cc b/runtime/neurun/test/graph/operand/IndexSet.cc new file mode 100644 index 000000000..969290fe1 --- /dev/null +++ b/runtime/neurun/test/graph/operand/IndexSet.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include "ir/OperandIndexSequence.h" + +using neurun::ir::OperandIndex; +using neurun::ir::OperandIndexSequence; + +TEST(graph_OperandIndexSequence, append) +{ + OperandIndexSequence iset{0, 2, 4, 8}; + + ASSERT_EQ(iset.size(), 4); + + iset.append(OperandIndex{10}); + + ASSERT_EQ(iset.size(), 5); + + neurun::ir::IOIndex index1{1}; + neurun::ir::IOIndex index2{4}; + + ASSERT_EQ(iset.at(index1), 2); + ASSERT_EQ(iset.at(index2), 10); + + ASSERT_TRUE(iset.contains(OperandIndex{2})); + ASSERT_TRUE(iset.contains(OperandIndex{10})); + ASSERT_FALSE(iset.contains(OperandIndex{11})); +} + +TEST(graph_OperandIndexSequence, replace) +{ + OperandIndexSequence iset{0, 1, 2, 3}; + + iset.replace(OperandIndex{1}, OperandIndex{9}); + ASSERT_FALSE(iset.contains(OperandIndex{1})); + ASSERT_TRUE(iset.contains(OperandIndex{9})); +} diff --git a/runtime/neurun/test/graph/operand/LayoutSet.cc b/runtime/neurun/test/graph/operand/LayoutSet.cc new file mode 100644 index 000000000..7b0dcc9f7 --- /dev/null +++ b/runtime/neurun/test/graph/operand/LayoutSet.cc @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include "ir/LayoutSet.h" + +using neurun::ir::Layout; +using neurun::ir::LayoutSet; + +TEST(graph_operand_LayoutSet, layout_set_operators) +{ + LayoutSet set1{Layout::NCHW}; + LayoutSet set2{Layout::NHWC}; + LayoutSet set3 = set1 | set2; + + ASSERT_EQ(set3.size(), 2); + + ASSERT_EQ((set3 - set1).size(), 1); + ASSERT_EQ((set3 - set1).contains(Layout::NHWC), true); + ASSERT_EQ((set3 - set2).size(), 1); + ASSERT_EQ((set3 - set2).contains(Layout::NCHW), true); + ASSERT_EQ((set3 - set3).size(), 0); + + ASSERT_EQ((set3 & set1).size(), 1); + ASSERT_EQ((set3 & set1).contains(Layout::NCHW), true); + ASSERT_EQ((set3 & set2).size(), 1); + ASSERT_EQ((set3 & set2).contains(Layout::NHWC), true); + ASSERT_EQ((set1 & set2).size(), 0); +} diff --git a/runtime/neurun/test/graph/operand/Set.cc b/runtime/neurun/test/graph/operand/Set.cc new file mode 100644 index 000000000..e30a5b7f7 --- /dev/null +++ b/runtime/neurun/test/graph/operand/Set.cc @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include "ir/Operands.h" + +TEST(graph_operand_Set, set_test) +{ + neurun::ir::Operands set; + + neurun::ir::Shape shape0{1, 2, 3}; + + neurun::ir::Shape shape1(4); + shape1.dim(0) = 10; + shape1.dim(1) = 20; + shape1.dim(2) = 30; + shape1.dim(3) = 40; + + neurun::ir::TypeInfo type{neurun::ir::DataType::INT32}; + + set.emplace(shape0, type); + set.emplace(shape1, type); + + ASSERT_EQ(set.exist(neurun::ir::OperandIndex{0u}), true); + ASSERT_EQ(set.exist(neurun::ir::OperandIndex{1u}), true); + ASSERT_EQ(set.exist(neurun::ir::OperandIndex{2u}), false); + + ASSERT_EQ(set.at(neurun::ir::OperandIndex{0u}).shape().dim(0), 1); + ASSERT_EQ(set.at(neurun::ir::OperandIndex{0u}).shape().dim(1), 2); + ASSERT_EQ(set.at(neurun::ir::OperandIndex{0u}).shape().dim(2), 3); +} diff --git a/runtime/neurun/test/graph/operand/UseDef.cc b/runtime/neurun/test/graph/operand/UseDef.cc new file mode 100644 index 000000000..c59032824 --- /dev/null +++ b/runtime/neurun/test/graph/operand/UseDef.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include "ir/Graph.h" +#include "ir/verifier/Verifier.h" +#include "cpp14/memory.h" +#include "../MockNode.h" + +#include <typeindex> + +namespace +{ + +using IndexSet = neurun::ir::OperandIndexSequence; +using Mock = neurun_test::ir::SimpleMock; + +} // namespace anonymous + +TEST(graph_operand_usedef, usedef_test) +{ + neurun::ir::Graph graph; + neurun::ir::verifier::DAGChecker verifier; + + neurun::ir::Shape shape(3); + neurun::ir::TypeInfo type{neurun::ir::DataType::INT32}; + + // Model Input/Output + auto input_operand = graph.addOperand(shape, type); + auto output_operand = graph.addOperand(shape, type); + + graph.addInput(input_operand); + graph.addOutput(output_operand); + + // MockNode1 + auto operand_index1 = graph.addOperand(shape, type); + auto mocknode_index1 = graph.addOperation( + nnfw::cpp14::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1})); + + // MockNode2 + auto operand_index2 = graph.addOperand(shape, type); + auto mocknode_index2 = graph.addOperation( + nnfw::cpp14::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2})); + + // MockNode3(two input) + auto multiinput_index = graph.addOperation(nnfw::cpp14::make_unique<Mock>( + IndexSet{operand_index1, operand_index2}, IndexSet{output_operand})); + + graph.finishBuilding(); + + ASSERT_EQ(verifier.verify(graph), true); + + // Check def + ASSERT_EQ(graph.operands().at(operand_index1).getDef().contains(mocknode_index1), true); + ASSERT_EQ(graph.operands().at(operand_index2).getDef().contains(mocknode_index2), true); + ASSERT_EQ(graph.operands().at(output_operand).getDef().contains(multiinput_index), true); + + ASSERT_EQ(graph.operands().at(operand_index1).getDef().contains(mocknode_index2), false); + ASSERT_EQ(graph.operands().at(operand_index1).getDef().contains(multiinput_index), false); + + // Check use + ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true); + ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index2), true); + ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(multiinput_index), false); + ASSERT_EQ(graph.operands().at(operand_index1).getUses().contains(multiinput_index), true); + ASSERT_EQ(graph.operands().at(operand_index2).getUses().contains(multiinput_index), true); + + ASSERT_EQ(graph.operands().at(input_operand).getUses().size(), 2); + ASSERT_EQ(graph.operands().at(operand_index1).getUses().size(), 1); + ASSERT_EQ(graph.operands().at(output_operand).getUses().size(), 0); +} diff --git a/runtime/neurun/test/graph/operation/Set.cc b/runtime/neurun/test/graph/operation/Set.cc new file mode 100644 index 000000000..fb3d54298 --- /dev/null +++ b/runtime/neurun/test/graph/operation/Set.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include "../MockNode.h" +#include "ir/Operations.h" + +using neurun::ir::Operations; +using neurun::ir::Operation; +using neurun::ir::OperationIndex; + +TEST(graph_operation_Set, operation_test) +{ + Operations ops; + ops.push(std::unique_ptr<Operation>(new neurun_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7}))); + OperationIndex idx{0u}; + ASSERT_EQ(ops.at(idx).getInputs().size(), 4); + ASSERT_EQ(ops.at(idx).getOutputs().size(), 3); +} diff --git a/runtime/neurun/test/graph/operation/SetIO.cc b/runtime/neurun/test/graph/operation/SetIO.cc new file mode 100644 index 000000000..110accfac --- /dev/null +++ b/runtime/neurun/test/graph/operation/SetIO.cc @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include "ir/Graph.h" +#include "ir/Index.h" +#include "ir/OperandIndexSequence.h" +#include "ir/operation/Conv2D.h" +#include "ir/operation/Concat.h" + +#include <cpp14/memory.h> + +#include <stdexcept> + +using Index = neurun::ir::IOIndex; +using IndexSet = neurun::ir::OperandIndexSequence; + +TEST(graph_operation_setIO, operation_setIO_conv) +{ + neurun::ir::Graph graph; + + neurun::ir::Shape shape{3}; + neurun::ir::TypeInfo type{neurun::ir::DataType::INT32}; + + // Add Conv + using Graph = neurun::ir::operation::Conv2D; + + auto input_operand = graph.addOperand(shape, type); + auto kernel_operand = graph.addOperand(shape, type); + auto bias_operand = graph.addOperand(shape, type); + IndexSet inputs{input_operand, kernel_operand, bias_operand}; + + Graph::Param conv_params; + conv_params.padding.type = neurun::ir::PaddingType::SAME; + conv_params.stride.horizontal = 1; + conv_params.stride.vertical = 1; + conv_params.activation = neurun::ir::Activation::NONE; + + auto output_operand = graph.addOperand(shape, type).value(); + IndexSet outputs{output_operand}; + + auto conv = nnfw::cpp14::make_unique<Graph>(inputs, outputs, conv_params); + + ASSERT_NE(conv, nullptr); + ASSERT_EQ(conv->getInputs().at(Index{0}).value(), inputs.at(0).value()); + conv->setInputs({8, 9, 10}); + ASSERT_NE(conv->getInputs().at(Index{0}).value(), inputs.at(0).value()); + ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8); +} + +TEST(graph_operation_setIO, operation_setIO_concat) +{ + neurun::ir::Graph graph; + + neurun::ir::Shape shape{3}; + + neurun::ir::TypeInfo type{neurun::ir::DataType::INT32}; + + using Graph = neurun::ir::operation::Concat; + + // Add Concat + IndexSet inputs; + for (int i = 0; i < 6; ++i) + { + inputs.append(graph.addOperand(shape, type)); + } + + Graph::Param concat_params{0}; + + auto output_operand = graph.addOperand(shape, type).value(); + IndexSet outputs{output_operand}; + + auto concat = nnfw::cpp14::make_unique<Graph>(inputs, outputs, concat_params); + + ASSERT_NE(concat, nullptr); + ASSERT_EQ(concat->getInputs().size(), 6); + ASSERT_EQ(concat->getInputs().at(Index{0}).value(), inputs.at(0).value()); + + concat->setInputs({80, 6, 9, 11}); + ASSERT_EQ(concat->getInputs().size(), 4); + ASSERT_NE(concat->getInputs().at(Index{0}).value(), inputs.at(0).value()); + ASSERT_EQ(concat->getInputs().at(Index{0}).value(), 80); + ASSERT_EQ(concat->getInputs().at(Index{2}).value(), 9); + ASSERT_THROW(concat->getInputs().at(Index{5}), std::out_of_range); +} diff --git a/runtime/neurun/test/graph/verifier/Verifier.cc b/runtime/neurun/test/graph/verifier/Verifier.cc new file mode 100644 index 000000000..cbda31dfe --- /dev/null +++ b/runtime/neurun/test/graph/verifier/Verifier.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include "ir/Operation.h" +#include "ir/Graph.h" +#include "ir/verifier/Verifier.h" +#include "cpp14/memory.h" +#include "ir/Operand.h" +#include "../MockNode.h" + +using IndexSet = neurun::ir::OperandIndexSequence; +using Mock = neurun_test::ir::SimpleMock; + +TEST(Verifier, dag_checker) +{ + neurun::ir::Graph graph; + + neurun::ir::Shape shape{3}; + neurun::ir::TypeInfo type{neurun::ir::DataType::INT32}; + + auto operand1 = graph.addOperand(shape, type); + auto operand2 = graph.addOperand(shape, type); + + graph.addInput(operand1); + graph.addOutput(operand2); + + graph.addOperation(nnfw::cpp14::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2})); + + graph.finishBuilding(); + + neurun::ir::verifier::DAGChecker verifier; + + ASSERT_EQ(verifier.verify(graph), true); +} diff --git a/runtime/neurun/test/util/ShapeInference.cc b/runtime/neurun/test/util/ShapeInference.cc new file mode 100644 index 000000000..a8dea3a79 --- /dev/null +++ b/runtime/neurun/test/util/ShapeInference.cc @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include "ir/Layout.h" +#include "util/ShapeInference.h" + +using namespace neurun::ir; + +TEST(ShapeInference, Elementwise) +{ + Shape lhs_shape{1, 299, 299, 3}; + Shape rhs_shape{3}; + auto infered_shapes = neurun::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape); + auto infered_out_shape = infered_shapes[0]; + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.dim(0), 1); + ASSERT_EQ(infered_out_shape.dim(1), 299); + ASSERT_EQ(infered_out_shape.dim(2), 299); + ASSERT_EQ(infered_out_shape.dim(3), 3); +} + +TEST(ShapeInference, IncorrectElementwise) +{ + Shape lhs_shape{1, 299, 299, 3}; + Shape rhs_shape{5, 3}; + ASSERT_THROW(neurun::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape), + std::runtime_error); +} + +TEST(ShapeInference, Pool2DNodeSame) +{ + Shape in_shape{10, 6, 12, 20}; + Stride stride{3, 7}; + Padding padding{PaddingType::SAME}; + + operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE}; + auto infered_shapes = neurun::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param); + auto infered_out_shape = infered_shapes[0]; + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); + + operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE}; + infered_shapes = neurun::shape_inference::inferMaxPoolShape(in_shape, max_pool_param); + infered_out_shape = infered_shapes[0]; + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); +} + +TEST(ShapeInference, Pool2DNodeValid) +{ + Shape in_shape{10, 6, 12, 20}; + Stride stride{3, 7}; + Padding padding{PaddingType::VALID}; + + operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE}; + auto infered_shapes = neurun::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param); + auto infered_out_shape = infered_shapes[0]; + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); + + operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE}; + infered_shapes = neurun::shape_inference::inferMaxPoolShape(in_shape, max_pool_param); + infered_out_shape = infered_shapes[0]; + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); +} + +TEST(ShapeInference, Pool2DNodeExplicit) +{ + Shape in_shape{10, 3, 5, 20}; + + Stride stride{3, 7}; + Padding padding{PaddingType::EXPLICIT, {4, 3, 2, 1}}; + + operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE}; + auto infered_shapes = neurun::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param); + auto infered_out_shape = infered_shapes[0]; + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); + + operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE}; + infered_shapes = neurun::shape_inference::inferMaxPoolShape(in_shape, max_pool_param); + infered_out_shape = infered_shapes[0]; + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); +} + +TEST(ShapeInference, Conv2D) +{ + Shape in_shape{10, 6, 12, 20}; + Shape ker_shape{30, 3, 6, 20}; + + operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE}; + auto infered_shapes = neurun::shape_inference::inferConv2DShape(in_shape, ker_shape, param); + auto infered_out_shape = infered_shapes[0]; + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30); + + param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE}; + infered_shapes = neurun::shape_inference::inferConv2DShape(in_shape, ker_shape, param); + infered_out_shape = infered_shapes[0]; + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30); + + param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::EXPLICIT, {4, 3, 2, 1}}, + Activation::NONE}; + infered_shapes = neurun::shape_inference::inferConv2DShape(in_shape, ker_shape, param); + infered_out_shape = infered_shapes[0]; + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30); +} + +TEST(ShapeInference, DepthwiseConv2D) +{ + Shape in_shape{10, 6, 12, 20}; + Shape ker_shape{1, 3, 6, 60}; + + operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3, + Activation::NONE}; + auto infered_shapes = + neurun::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param); + auto infered_out_shape = infered_shapes[0]; + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60); + + param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, 3, + Activation::NONE}; + infered_shapes = neurun::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param); + infered_out_shape = infered_shapes[0]; + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60); + + param = operation::DepthwiseConv2D::Param{ + Stride{3, 7}, Padding{PaddingType::EXPLICIT, {4, 3, 2, 1}}, 3, Activation::NONE}; + infered_shapes = neurun::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param); + infered_out_shape = infered_shapes[0]; + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60); +} + +TEST(ShapeInference, Concat) +{ + Shape in1{10, 20, 30, 3, 50}; + Shape in2{10, 20, 30, 2, 50}; + Shape in3{10, 20, 30, 2, 50}; + + operation::Concat::Param param{3}; + auto infered_shapes = neurun::shape_inference::inferConcatShape({in1, in2, in3}, param); + auto infered_out_shape = infered_shapes[0]; + + ASSERT_EQ(infered_out_shape.rank(), 5); + ASSERT_EQ(infered_out_shape.dim(0), 10); + ASSERT_EQ(infered_out_shape.dim(1), 20); + ASSERT_EQ(infered_out_shape.dim(2), 30); + ASSERT_EQ(infered_out_shape.dim(3), 7); + ASSERT_EQ(infered_out_shape.dim(4), 50); +} + +TEST(ShapeInference, FullyConnected) +{ + Shape in_shape{3, 4, 5, 6}; + Shape ker_shape{3, 10}; + auto infered_shapes = neurun::shape_inference::inferFullyConnectedShape(in_shape, ker_shape); + auto infered_out_shape = infered_shapes[0]; + + ASSERT_EQ(infered_out_shape.rank(), 2); + ASSERT_EQ(infered_out_shape.dim(0), 36); + ASSERT_EQ(infered_out_shape.dim(1), 3); +} |